mirror of http://192.168.1.51:8099/lmh188/twain3.0
2577 lines
93 KiB
C
2577 lines
93 KiB
C
/*====================================================================*
|
|
- Copyright (C) 2001 Leptonica. All rights reserved.
|
|
-
|
|
- Redistribution and use in source and binary forms, with or without
|
|
- modification, are permitted provided that the following conditions
|
|
- are met:
|
|
- 1. Redistributions of source code must retain the above copyright
|
|
- notice, this list of conditions and the following disclaimer.
|
|
- 2. Redistributions in binary form must reproduce the above
|
|
- copyright notice, this list of conditions and the following
|
|
- disclaimer in the documentation and/or other materials
|
|
- provided with the distribution.
|
|
-
|
|
- THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
|
- ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
|
- LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
|
- A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL ANY
|
|
- CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
|
- EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
|
- PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
|
- PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
|
|
- OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
|
- NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
|
- SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
*====================================================================*/
|
|
|
|
/*
|
|
* jbclass.c
|
|
*
|
|
* These are functions for unsupervised classification of
|
|
* collections of connected components -- either characters or
|
|
* words -- in binary images. They can be used as image
|
|
* processing steps in jbig2 compression.
|
|
*
|
|
* Initialization
|
|
*
|
|
* JBCLASSER *jbRankHausInit() [rank hausdorff encoder]
|
|
* JBCLASSER *jbCorrelationInit() [correlation encoder]
|
|
* JBCLASSER *jbCorrelationInitWithoutComponents() [ditto]
|
|
* static JBCLASSER *jbCorrelationInitInternal()
|
|
*
|
|
* Classify the pages
|
|
*
|
|
* l_int32 jbAddPages()
|
|
* l_int32 jbAddPage()
|
|
* l_int32 jbAddPageComponents()
|
|
*
|
|
* Rank hausdorff classifier
|
|
*
|
|
* l_int32 jbClassifyRankHaus()
|
|
* l_int32 pixHaustest()
|
|
* l_int32 pixRankHaustest()
|
|
*
|
|
* Binary correlation classifier
|
|
*
|
|
* l_int32 jbClassifyCorrelation()
|
|
*
|
|
* Determine the image components we start with
|
|
*
|
|
* l_int32 jbGetComponents()
|
|
* l_int32 pixWordMaskByDilation()
|
|
* l_int32 pixWordBoxesByDilation()
|
|
*
|
|
* Build grayscale composites (templates)
|
|
*
|
|
* PIXA *jbAccumulateComposites
|
|
* PIXA *jbTemplatesFromComposites
|
|
*
|
|
* Utility functions for Classer
|
|
*
|
|
* JBCLASSER *jbClasserCreate()
|
|
* void jbClasserDestroy()
|
|
*
|
|
* Utility functions for Data
|
|
*
|
|
* JBDATA *jbDataSave()
|
|
* void jbDataDestroy()
|
|
* l_int32 jbDataWrite()
|
|
* JBDATA *jbDataRead()
|
|
* PIXA *jbDataRender()
|
|
* l_int32 jbGetULCorners()
|
|
* l_int32 jbGetLLCorners()
|
|
*
|
|
* Static helpers
|
|
*
|
|
* static JBFINDCTX *findSimilarSizedTemplatesInit()
|
|
* static l_int32 findSimilarSizedTemplatesNext()
|
|
* static void findSimilarSizedTemplatesDestroy()
|
|
* static l_int32 finalPositioningForAlignment()
|
|
*
|
|
* Note: this is NOT an implementation of the JPEG jbig2
|
|
* proposed standard encoder, the specifications for which
|
|
* can be found at http://www.jpeg.org/jbigpt2.html.
|
|
* (See below for a full implementation.)
|
|
* It is an implementation of the lower-level part of an encoder that:
|
|
*
|
|
* (1) identifies connected components that are going to be used
|
|
* (2) puts them in similarity classes (this is an unsupervised
|
|
* classifier), and
|
|
* (3) stores the result in a simple file format (2 files,
|
|
* one for templates and one for page/coordinate/template-index
|
|
* quartets).
|
|
*
|
|
* An actual implementation of the official jbig2 encoder could
|
|
* start with parts (1) and (2), and would then compress the quartets
|
|
* according to the standards requirements (e.g., Huffman or
|
|
* arithmetic coding of coordinate differences and image templates).
|
|
*
|
|
* The low-level part of the encoder provided here has the
|
|
* following useful features:
|
|
*
|
|
* ~ It is accurate in the identification of templates
|
|
* and classes because it uses a windowed hausdorff
|
|
* distance metric.
|
|
* ~ It is accurate in the placement of the connected
|
|
* components, doing a two step process of first aligning
|
|
* the the centroids of the template with those of each instance,
|
|
* and then making a further correction of up to +- 1 pixel
|
|
* in each direction to best align the templates.
|
|
* ~ It is fast because it uses a morphologically based
|
|
* matching algorithm to implement the hausdorff criterion,
|
|
* and it selects the patterns that are possible matches
|
|
* based on their size.
|
|
*
|
|
* We provide two different matching functions, one using Hausdorff
|
|
* distance and one using a simple image correlation.
|
|
* The Hausdorff method sometimes produces better results for the
|
|
* same number of classes, because it gives a relatively small
|
|
* effective weight to foreground pixels near the boundary,
|
|
* and a relatively large weight to foreground pixels that are
|
|
* not near the boundary. By effectively ignoring these boundary
|
|
* pixels, Hausdorff weighting corresponds better to the expected
|
|
* probabilities of the pixel values in a scanned image, where the
|
|
* variations in instances of the same printed character are much
|
|
* more likely to be in pixels near the boundary. By contrast,
|
|
* the correlation method gives equal weight to all foreground pixels.
|
|
*
|
|
* For best results, use the correlation method. Correlation takes
|
|
* the number of fg pixels in the AND of instance and template,
|
|
* divided by the product of the number of fg pixels in instance
|
|
* and template. It compares this with a threshold that, in
|
|
* general, depends on the fractional coverage of the template.
|
|
* For heavy text, the threshold is raised above that for light
|
|
* text, By using both these parameters (basic threshold and
|
|
* adjustment factor for text weight), one has more flexibility
|
|
* and can arrive at the fewest substitution errors, although
|
|
* this comes at the price of more templates.
|
|
*
|
|
* The strict Hausdorff scoring is not a rank weighting, because a
|
|
* single pixel beyond the given distance will cause a match
|
|
* failure. A rank Hausdorff is more robust to non-boundary noise,
|
|
* but it is also more susceptible to confusing components that
|
|
* should be in different classes. For implementing a jbig2
|
|
* application for visually lossless binary image compression,
|
|
* you have two choices:
|
|
*
|
|
* (1) use a 3x3 structuring element (size = 3) and a strict
|
|
* Hausdorff comparison (rank = 1.0 in the rank Hausdorff
|
|
* function). This will result in a minimal number of classes,
|
|
* but confusion of small characters, such as italic and
|
|
* non-italic lower-case 'o', can still occur.
|
|
* (2) use the correlation method with a threshold of 0.85
|
|
* and a weighting factor of about 0.7. This will result in
|
|
* a larger number of classes, but should not be confused
|
|
* either by similar small characters or by extremely
|
|
* thick sans serif characters, such as in prog/cootoots.png.
|
|
*
|
|
* As mentioned above, if visual substitution errors must be
|
|
* avoided, you should use the correlation method.
|
|
*
|
|
* We provide executables that show how to do the encoding:
|
|
* prog/jbrankhaus.c
|
|
* prog/jbcorrelation.c
|
|
*
|
|
* The basic flow for correlation classification goes as follows,
|
|
* where specific choices have been made for parameters (Hausdorff
|
|
* is the same except for initialization):
|
|
*
|
|
* // Initialize and save data in the classer
|
|
* JBCLASSER *classer =
|
|
* jbCorrelationInit(JB_CONN_COMPS, 0, 0, 0.8, 0.7);
|
|
* SARRAY *safiles = getSortedPathnamesInDirectory(directory,
|
|
* NULL, 0, 0);
|
|
* jbAddPages(classer, safiles);
|
|
*
|
|
* // Save the data in a data structure for serialization,
|
|
* // and write it into two files.
|
|
* JBDATA *data = jbDataSave(classer);
|
|
* jbDataWrite(rootname, data);
|
|
*
|
|
* // Reconstruct (render) the pages from the encoded data.
|
|
* PIXA *pixa = jbDataRender(data, FALSE);
|
|
*
|
|
* Adam Langley has built a jbig2 standards-compliant encoder, the
|
|
* first one to appear in open source. You can get this encoder at:
|
|
* http://www.imperialviolet.org/jbig2.html
|
|
*
|
|
* It uses arithmetic encoding throughout. It encodes binary images
|
|
* losslessly with a single arithmetic coding over the full image.
|
|
* It also does both lossy and lossless encoding from connected
|
|
* components, using leptonica to generate the templates representing
|
|
* each cluster.
|
|
*/
|
|
|
|
#include <string.h>
|
|
#include <math.h>
|
|
#include "allheaders.h"
|
|
|
|
//static const l_int32 L_BUF_SIZE = 512;
|
|
#define L_BUF_SIZE 512
|
|
|
|
/* For jbClassifyRankHaus(): size of border added around
|
|
* pix of each c.c., to allow further processing. This
|
|
* should be at least the sum of the MAX_DIFF_HEIGHT
|
|
* (or MAX_DIFF_WIDTH) and one-half the size of the Sel */
|
|
static const l_int32 JB_ADDED_PIXELS = 6;
|
|
|
|
/* For pixHaustest(), pixRankHaustest() and pixCorrelationScore():
|
|
* choose these to be 2 or greater */
|
|
static const l_int32 MAX_DIFF_WIDTH = 2; /* use at least 2 */
|
|
static const l_int32 MAX_DIFF_HEIGHT = 2; /* use at least 2 */
|
|
|
|
/* In initialization, you have the option to discard components
|
|
* (cc, characters or words) that have either width or height larger
|
|
* than a given size. This is convenient for jbDataSave(), because
|
|
* the components are placed onto a regular lattice with cell
|
|
* dimension equal to the maximum component size. The default
|
|
* values are given here. If you want to save all components,
|
|
* use a sufficiently large set of dimensions. */
|
|
static const l_int32 MAX_CONN_COMP_WIDTH = 350; /* default max cc width */
|
|
static const l_int32 MAX_CHAR_COMP_WIDTH = 350; /* default max char width */
|
|
static const l_int32 MAX_WORD_COMP_WIDTH = 1000; /* default max word width */
|
|
static const l_int32 MAX_COMP_HEIGHT = 120; /* default max component height */
|
|
|
|
/* This stores the state of a state machine which fetches
|
|
* similar sized templates */
|
|
struct JbFindTemplatesState
|
|
{
|
|
JBCLASSER *classer; /* classer */
|
|
l_int32 w; /* desired width */
|
|
l_int32 h; /* desired height */
|
|
l_int32 i; /* index into two_by_two step array */
|
|
L_DNA *dna; /* current number array */
|
|
l_int32 n; /* current element of dna */
|
|
};
|
|
typedef struct JbFindTemplatesState JBFINDCTX;
|
|
|
|
/* Static initialization function */
|
|
static JBCLASSER * jbCorrelationInitInternal(l_int32 components,
|
|
l_int32 maxwidth, l_int32 maxheight, l_float32 thresh,
|
|
l_float32 weightfactor, l_int32 keep_components);
|
|
|
|
/* Static helper functions */
|
|
static JBFINDCTX * findSimilarSizedTemplatesInit(JBCLASSER *classer, PIX *pixs);
|
|
static l_int32 findSimilarSizedTemplatesNext(JBFINDCTX *context);
|
|
static void findSimilarSizedTemplatesDestroy(JBFINDCTX **pcontext);
|
|
static l_int32 finalPositioningForAlignment(PIX *pixs, l_int32 x, l_int32 y,
|
|
l_int32 idelx, l_int32 idely, PIX *pixt,
|
|
l_int32 *sumtab, l_int32 *pdx, l_int32 *pdy);
|
|
|
|
#ifndef NO_CONSOLE_IO
|
|
#define DEBUG_CORRELATION_SCORE 0
|
|
#endif /* ~NO_CONSOLE_IO */
|
|
|
|
|
|
/*----------------------------------------------------------------------*
|
|
* Initialization *
|
|
*----------------------------------------------------------------------*/
|
|
/*!
|
|
* \brief jbRankHausInit()
|
|
*
|
|
* \param[in] components JB_CONN_COMPS, JB_CHARACTERS, JB_WORDS
|
|
* \param[in] maxwidth of component; use 0 for default
|
|
* \param[in] maxheight of component; use 0 for default
|
|
* \param[in] size of square structuring element; 2, representing
|
|
* 2x2 sel, is necessary for reasonable accuracy of
|
|
* small components; combine this with rank ~ 0.97
|
|
* to avoid undue class expansion
|
|
* \param[in] rank rank val of match, each way; in [0.5 - 1.0];
|
|
* when using size = 2, 0.97 is a reasonable value
|
|
* \return jbclasser if OK; NULL on error
|
|
*/
|
|
JBCLASSER *
|
|
jbRankHausInit(l_int32 components,
|
|
l_int32 maxwidth,
|
|
l_int32 maxheight,
|
|
l_int32 size,
|
|
l_float32 rank)
|
|
{
|
|
JBCLASSER *classer;
|
|
|
|
PROCNAME("jbRankHausInit");
|
|
|
|
if (components != JB_CONN_COMPS && components != JB_CHARACTERS &&
|
|
components != JB_WORDS)
|
|
return (JBCLASSER *)ERROR_PTR("invalid components", procName, NULL);
|
|
if (size < 1 || size > 10)
|
|
return (JBCLASSER *)ERROR_PTR("size not reasonable", procName, NULL);
|
|
if (rank < 0.5 || rank > 1.0)
|
|
return (JBCLASSER *)ERROR_PTR("rank not in [0.5-1.0]", procName, NULL);
|
|
if (maxwidth == 0) {
|
|
if (components == JB_CONN_COMPS)
|
|
maxwidth = MAX_CONN_COMP_WIDTH;
|
|
else if (components == JB_CHARACTERS)
|
|
maxwidth = MAX_CHAR_COMP_WIDTH;
|
|
else /* JB_WORDS */
|
|
maxwidth = MAX_WORD_COMP_WIDTH;
|
|
}
|
|
if (maxheight == 0)
|
|
maxheight = MAX_COMP_HEIGHT;
|
|
|
|
if ((classer = jbClasserCreate(JB_RANKHAUS, components)) == NULL)
|
|
return (JBCLASSER *)ERROR_PTR("classer not made", procName, NULL);
|
|
classer->maxwidth = maxwidth;
|
|
classer->maxheight = maxheight;
|
|
classer->sizehaus = size;
|
|
classer->rankhaus = rank;
|
|
classer->dahash = l_dnaHashCreate(5507, 4); /* 5507 is prime */
|
|
classer->keep_pixaa = 1; /* keep all components in pixaa */
|
|
return classer;
|
|
}
|
|
|
|
|
|
/*!
|
|
* \brief jbCorrelationInit()
|
|
*
|
|
* \param[in] components JB_CONN_COMPS, JB_CHARACTERS, JB_WORDS
|
|
* \param[in] maxwidth of component; use 0 for default
|
|
* \param[in] maxheight of component; use 0 for default
|
|
* \param[in] thresh value for correlation score: in [0.4 - 0.98]
|
|
* \param[in] weightfactor corrects thresh for thick characters [0.0 - 1.0]
|
|
* \return jbclasser if OK; NULL on error
|
|
*
|
|
* <pre>
|
|
* Notes:
|
|
* (1) For scanned text, suggested input values are:
|
|
* thresh ~ [0.8 - 0.85]
|
|
* weightfactor ~ [0.5 - 0.6]
|
|
* (2) For electronically generated fonts (e.g., rasterized pdf),
|
|
* a very high thresh (e.g., 0.95) will not cause a significant
|
|
* increase in the number of classes.
|
|
* </pre>
|
|
*/
|
|
JBCLASSER *
|
|
jbCorrelationInit(l_int32 components,
|
|
l_int32 maxwidth,
|
|
l_int32 maxheight,
|
|
l_float32 thresh,
|
|
l_float32 weightfactor)
|
|
{
|
|
return jbCorrelationInitInternal(components, maxwidth, maxheight, thresh,
|
|
weightfactor, 1);
|
|
}
|
|
|
|
/*!
|
|
* \brief jbCorrelationInitWithoutComponents()
|
|
*
|
|
* \param[in] components JB_CONN_COMPS, JB_CHARACTERS, JB_WORDS
|
|
* \param[in] maxwidth of component; use 0 for default
|
|
* \param[in] maxheight of component; use 0 for default
|
|
* \param[in] thresh value for correlation score: in [0.4 - 0.98]
|
|
* \param[in] weightfactor corrects thresh for thick characters [0.0 - 1.0]
|
|
* \return jbclasser if OK; NULL on error
|
|
*
|
|
* <pre>
|
|
* Notes:
|
|
* Acts the same as jbCorrelationInit(), but the resulting
|
|
* object doesn't keep a list of all the components.
|
|
* </pre>
|
|
*/
|
|
JBCLASSER *
|
|
jbCorrelationInitWithoutComponents(l_int32 components,
|
|
l_int32 maxwidth,
|
|
l_int32 maxheight,
|
|
l_float32 thresh,
|
|
l_float32 weightfactor)
|
|
{
|
|
return jbCorrelationInitInternal(components, maxwidth, maxheight, thresh,
|
|
weightfactor, 0);
|
|
}
|
|
|
|
|
|
static JBCLASSER *
|
|
jbCorrelationInitInternal(l_int32 components,
|
|
l_int32 maxwidth,
|
|
l_int32 maxheight,
|
|
l_float32 thresh,
|
|
l_float32 weightfactor,
|
|
l_int32 keep_components)
|
|
{
|
|
JBCLASSER *classer;
|
|
|
|
PROCNAME("jbCorrelationInitInternal");
|
|
|
|
if (components != JB_CONN_COMPS && components != JB_CHARACTERS &&
|
|
components != JB_WORDS)
|
|
return (JBCLASSER *)ERROR_PTR("invalid components", procName, NULL);
|
|
if (thresh < 0.4 || thresh > 0.98)
|
|
return (JBCLASSER *)ERROR_PTR("thresh not in range [0.4 - 0.98]",
|
|
procName, NULL);
|
|
if (weightfactor < 0.0 || weightfactor > 1.0)
|
|
return (JBCLASSER *)ERROR_PTR("weightfactor not in range [0.0 - 1.0]",
|
|
procName, NULL);
|
|
if (maxwidth == 0) {
|
|
if (components == JB_CONN_COMPS)
|
|
maxwidth = MAX_CONN_COMP_WIDTH;
|
|
else if (components == JB_CHARACTERS)
|
|
maxwidth = MAX_CHAR_COMP_WIDTH;
|
|
else /* JB_WORDS */
|
|
maxwidth = MAX_WORD_COMP_WIDTH;
|
|
}
|
|
if (maxheight == 0)
|
|
maxheight = MAX_COMP_HEIGHT;
|
|
|
|
|
|
if ((classer = jbClasserCreate(JB_CORRELATION, components)) == NULL)
|
|
return (JBCLASSER *)ERROR_PTR("classer not made", procName, NULL);
|
|
classer->maxwidth = maxwidth;
|
|
classer->maxheight = maxheight;
|
|
classer->thresh = thresh;
|
|
classer->weightfactor = weightfactor;
|
|
classer->dahash = l_dnaHashCreate(5507, 4); /* 5507 is prime */
|
|
classer->keep_pixaa = keep_components;
|
|
return classer;
|
|
}
|
|
|
|
|
|
/*----------------------------------------------------------------------*
|
|
* Classify the pages *
|
|
*----------------------------------------------------------------------*/
|
|
/*!
|
|
* \brief jbAddPages()
|
|
*
|
|
* \param[in] jbclasser
|
|
* \param[in] safiles of page image file names
|
|
* \return 0 if OK; 1 on error
|
|
*
|
|
* <pre>
|
|
* Notes:
|
|
* (1) jbclasser makes a copy of the array of file names.
|
|
* (2) The caller is still responsible for destroying the input array.
|
|
* </pre>
|
|
*/
|
|
l_ok
|
|
jbAddPages(JBCLASSER *classer,
|
|
SARRAY *safiles)
|
|
{
|
|
l_int32 i, nfiles;
|
|
char *fname;
|
|
PIX *pix;
|
|
|
|
PROCNAME("jbAddPages");
|
|
|
|
if (!classer)
|
|
return ERROR_INT("classer not defined", procName, 1);
|
|
if (!safiles)
|
|
return ERROR_INT("safiles not defined", procName, 1);
|
|
|
|
classer->safiles = sarrayCopy(safiles);
|
|
nfiles = sarrayGetCount(safiles);
|
|
for (i = 0; i < nfiles; i++) {
|
|
fname = sarrayGetString(safiles, i, L_NOCOPY);
|
|
if ((pix = pixRead(fname)) == NULL) {
|
|
L_WARNING("image file %d not read\n", procName, i);
|
|
continue;
|
|
}
|
|
if (pixGetDepth(pix) != 1) {
|
|
L_WARNING("image file %d not 1 bpp\n", procName, i);
|
|
continue;
|
|
}
|
|
jbAddPage(classer, pix);
|
|
pixDestroy(&pix);
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
|
|
/*!
|
|
* \brief jbAddPage()
|
|
*
|
|
* \param[in] jbclasser
|
|
* \param[in] pixs input page
|
|
* \return 0 if OK; 1 on error
|
|
*/
|
|
l_ok
|
|
jbAddPage(JBCLASSER *classer,
|
|
PIX *pixs)
|
|
{
|
|
BOXA *boxas;
|
|
PIXA *pixas;
|
|
|
|
PROCNAME("jbAddPage");
|
|
|
|
if (!classer)
|
|
return ERROR_INT("classer not defined", procName, 1);
|
|
if (!pixs || pixGetDepth(pixs) != 1)
|
|
return ERROR_INT("pixs not defined or not 1 bpp", procName, 1);
|
|
|
|
classer->w = pixGetWidth(pixs);
|
|
classer->h = pixGetHeight(pixs);
|
|
|
|
/* Get the appropriate components and their bounding boxes */
|
|
if (jbGetComponents(pixs, classer->components, classer->maxwidth,
|
|
classer->maxheight, &boxas, &pixas)) {
|
|
return ERROR_INT("components not made", procName, 1);
|
|
}
|
|
|
|
jbAddPageComponents(classer, pixs, boxas, pixas);
|
|
boxaDestroy(&boxas);
|
|
pixaDestroy(&pixas);
|
|
return 0;
|
|
}
|
|
|
|
|
|
/*!
|
|
* \brief jbAddPageComponents()
|
|
*
|
|
* \param[in] jbclasser
|
|
* \param[in] pixs input page
|
|
* \param[in] boxas b.b. of components for this page
|
|
* \param[in] pixas components for this page
|
|
* \return 0 if OK; 1 on error
|
|
*
|
|
* <pre>
|
|
* Notes:
|
|
* (1) If there are no components on the page, we don't require input
|
|
* of empty boxas or pixas, although that's the typical situation.
|
|
* </pre>
|
|
*/
|
|
l_ok
|
|
jbAddPageComponents(JBCLASSER *classer,
|
|
PIX *pixs,
|
|
BOXA *boxas,
|
|
PIXA *pixas)
|
|
{
|
|
l_int32 n;
|
|
|
|
PROCNAME("jbAddPageComponents");
|
|
|
|
if (!classer)
|
|
return ERROR_INT("classer not defined", procName, 1);
|
|
if (!pixs)
|
|
return ERROR_INT("pix not defined", procName, 1);
|
|
|
|
/* Test for no components on the current page. Always update the
|
|
* number of pages processed, even if nothing is on it. */
|
|
if (!boxas || !pixas || (boxaGetCount(boxas) == 0)) {
|
|
classer->npages++;
|
|
return 0;
|
|
}
|
|
|
|
/* Get classes. For hausdorff, it uses a specified size of
|
|
* structuring element and specified rank. For correlation,
|
|
* it uses a specified threshold. */
|
|
if (classer->method == JB_RANKHAUS) {
|
|
if (jbClassifyRankHaus(classer, boxas, pixas))
|
|
return ERROR_INT("rankhaus classification failed", procName, 1);
|
|
} else { /* classer->method == JB_CORRELATION */
|
|
if (jbClassifyCorrelation(classer, boxas, pixas))
|
|
return ERROR_INT("correlation classification failed", procName, 1);
|
|
}
|
|
|
|
/* Find the global UL corners, adjusted for each instance so
|
|
* that the class template and instance will have their
|
|
* centroids in the same place. Then the template can be
|
|
* used to replace the instance. */
|
|
if (jbGetULCorners(classer, pixs, boxas))
|
|
return ERROR_INT("UL corners not found", procName, 1);
|
|
|
|
/* Update total component counts and number of pages processed. */
|
|
n = boxaGetCount(boxas);
|
|
classer->baseindex += n;
|
|
numaAddNumber(classer->nacomps, n);
|
|
classer->npages++;
|
|
return 0;
|
|
}
|
|
|
|
|
|
/*----------------------------------------------------------------------*
|
|
* Classification using windowed rank hausdorff metric *
|
|
*----------------------------------------------------------------------*/
|
|
/*!
|
|
* \brief jbClassifyRankHaus()
|
|
*
|
|
* \param[in] jbclasser
|
|
* \param[in] boxa new components for classification
|
|
* \param[in] pixas new components for classification
|
|
* \return 0 if OK; 1 on error
|
|
*/
|
|
l_ok
|
|
jbClassifyRankHaus(JBCLASSER *classer,
|
|
BOXA *boxa,
|
|
PIXA *pixas)
|
|
{
|
|
l_int32 n, nt, i, wt, ht, iclass, size, found, testval;
|
|
l_int32 npages, area1, area3;
|
|
l_int32 *tab8;
|
|
l_float32 rank, x1, y1, x2, y2;
|
|
BOX *box;
|
|
NUMA *naclass, *napage;
|
|
NUMA *nafg; /* fg area of all instances */
|
|
NUMA *nafgt; /* fg area of all templates */
|
|
JBFINDCTX *findcontext;
|
|
L_DNAHASH *dahash;
|
|
PIX *pix, *pix1, *pix2, *pix3, *pix4;
|
|
PIXA *pixa, *pixa1, *pixa2, *pixat, *pixatd;
|
|
PIXAA *pixaa;
|
|
PTA *pta, *ptac, *ptact;
|
|
SEL *sel;
|
|
|
|
PROCNAME("jbClassifyRankHaus");
|
|
|
|
if (!classer)
|
|
return ERROR_INT("classer not defined", procName, 1);
|
|
if (!boxa)
|
|
return ERROR_INT("boxa not defined", procName, 1);
|
|
if (!pixas)
|
|
return ERROR_INT("pixas not defined", procName, 1);
|
|
if ((n = pixaGetCount(pixas)) == 0)
|
|
return ERROR_INT("pixas is empty", procName, 1);
|
|
if ((nafg = pixaCountPixels(pixas)) == NULL) /* areas for this page */
|
|
return ERROR_INT("fg counting failed", procName, 1);
|
|
|
|
npages = classer->npages;
|
|
size = classer->sizehaus;
|
|
sel = selCreateBrick(size, size, size / 2, size / 2, SEL_HIT);
|
|
|
|
/* Generate the bordered pixa, with and without dilation.
|
|
* pixa1 and pixa2 contain all the input components. */
|
|
pixa1 = pixaCreate(n);
|
|
pixa2 = pixaCreate(n);
|
|
for (i = 0; i < n; i++) {
|
|
pix = pixaGetPix(pixas, i, L_CLONE);
|
|
pix1 = pixAddBorderGeneral(pix, JB_ADDED_PIXELS, JB_ADDED_PIXELS,
|
|
JB_ADDED_PIXELS, JB_ADDED_PIXELS, 0);
|
|
pix2 = pixDilate(NULL, pix1, sel);
|
|
pixaAddPix(pixa1, pix1, L_INSERT); /* un-dilated */
|
|
pixaAddPix(pixa2, pix2, L_INSERT); /* dilated */
|
|
pixDestroy(&pix);
|
|
}
|
|
|
|
/* Get the centroids of all the bordered images.
|
|
* These are relative to the UL corner of each (bordered) pix. */
|
|
pta = pixaCentroids(pixa1); /* centroids for this page; use here */
|
|
ptac = classer->ptac; /* holds centroids of components up to this page */
|
|
ptaJoin(ptac, pta, 0, -1); /* save centroids of all components */
|
|
ptact = classer->ptact; /* holds centroids of templates */
|
|
|
|
/* Use these to save the class and page of each component. */
|
|
naclass = classer->naclass;
|
|
napage = classer->napage;
|
|
|
|
/* Store the unbordered pix in a pixaa, in a hierarchical
|
|
* set of arrays. There is one pixa for each class,
|
|
* and the pix in each pixa are all the instances found
|
|
* of that class. This is actually more than one would need
|
|
* for a jbig2 encoder, but there are two reasons to keep
|
|
* them around: (1) the set of instances for each class
|
|
* can be used to make an improved binary (or, better,
|
|
* a grayscale) template, rather than simply using the first
|
|
* one in the set; (2) we can investigate the failures
|
|
* of the classifier. This pixaa grows as we process
|
|
* successive pages. */
|
|
pixaa = classer->pixaa;
|
|
|
|
/* arrays to store class exemplars (templates) */
|
|
pixat = classer->pixat; /* un-dilated */
|
|
pixatd = classer->pixatd; /* dilated */
|
|
|
|
/* Fill up the pixaa tree with the template exemplars as
|
|
* the first pix in each pixa. As we add each pix,
|
|
* we also add the associated box to the pixa.
|
|
* We also keep track of the centroid of each pix,
|
|
* and use the difference between centroids (of the
|
|
* pix with the exemplar we are checking it with)
|
|
* to align the two when checking that the Hausdorff
|
|
* distance does not exceed a threshold.
|
|
* The threshold is set by the Sel used for dilating.
|
|
* For example, a 3x3 brick, sel_3, corresponds to a
|
|
* Hausdorff distance of 1. In general, for an NxN brick,
|
|
* with N odd, corresponds to a Hausdorff distance of (N - 1)/2.
|
|
* It turns out that we actually need to use a sel of size 2x2
|
|
* to avoid small bad components when there is a halftone image
|
|
* from which components can be chosen.
|
|
* The larger the Sel you use, the fewer the number of classes,
|
|
* and the greater the likelihood of putting semantically
|
|
* different objects in the same class. For simplicity,
|
|
* we do this separately for the case of rank == 1.0 (exact
|
|
* match within the Hausdorff distance) and rank < 1.0. */
|
|
rank = classer->rankhaus;
|
|
dahash = classer->dahash;
|
|
if (rank == 1.0) {
|
|
for (i = 0; i < n; i++) {
|
|
pix1 = pixaGetPix(pixa1, i, L_CLONE);
|
|
pix2 = pixaGetPix(pixa2, i, L_CLONE);
|
|
ptaGetPt(pta, i, &x1, &y1);
|
|
nt = pixaGetCount(pixat); /* number of templates */
|
|
found = FALSE;
|
|
findcontext = findSimilarSizedTemplatesInit(classer, pix1);
|
|
while ((iclass = findSimilarSizedTemplatesNext(findcontext)) > -1) {
|
|
/* Find score for this template */
|
|
pix3 = pixaGetPix(pixat, iclass, L_CLONE);
|
|
pix4 = pixaGetPix(pixatd, iclass, L_CLONE);
|
|
ptaGetPt(ptact, iclass, &x2, &y2);
|
|
testval = pixHaustest(pix1, pix2, pix3, pix4, x1 - x2, y1 - y2,
|
|
MAX_DIFF_WIDTH, MAX_DIFF_HEIGHT);
|
|
pixDestroy(&pix3);
|
|
pixDestroy(&pix4);
|
|
if (testval == 1) {
|
|
found = TRUE;
|
|
numaAddNumber(naclass, iclass);
|
|
numaAddNumber(napage, npages);
|
|
if (classer->keep_pixaa) {
|
|
pixa = pixaaGetPixa(pixaa, iclass, L_CLONE);
|
|
pix = pixaGetPix(pixas, i, L_CLONE);
|
|
pixaAddPix(pixa, pix, L_INSERT);
|
|
box = boxaGetBox(boxa, i, L_CLONE);
|
|
pixaAddBox(pixa, box, L_INSERT);
|
|
pixaDestroy(&pixa);
|
|
}
|
|
break;
|
|
}
|
|
}
|
|
findSimilarSizedTemplatesDestroy(&findcontext);
|
|
if (found == FALSE) { /* new class */
|
|
numaAddNumber(naclass, nt);
|
|
numaAddNumber(napage, npages);
|
|
pixa = pixaCreate(0);
|
|
pix = pixaGetPix(pixas, i, L_CLONE); /* unbordered instance */
|
|
pixaAddPix(pixa, pix, L_INSERT);
|
|
wt = pixGetWidth(pix);
|
|
ht = pixGetHeight(pix);
|
|
l_dnaHashAdd(dahash, (l_uint64)ht * wt, nt);
|
|
box = boxaGetBox(boxa, i, L_CLONE);
|
|
pixaAddBox(pixa, box, L_INSERT);
|
|
pixaaAddPixa(pixaa, pixa, L_INSERT); /* unbordered instance */
|
|
ptaAddPt(ptact, x1, y1);
|
|
pixaAddPix(pixat, pix1, L_INSERT); /* bordered template */
|
|
pixaAddPix(pixatd, pix2, L_INSERT); /* bordered dil template */
|
|
} else { /* don't save them */
|
|
pixDestroy(&pix1);
|
|
pixDestroy(&pix2);
|
|
}
|
|
}
|
|
} else { /* rank < 1.0 */
|
|
nafgt = classer->nafgt;
|
|
tab8 = makePixelSumTab8();
|
|
for (i = 0; i < n; i++) { /* all instances on this page */
|
|
pix1 = pixaGetPix(pixa1, i, L_CLONE);
|
|
numaGetIValue(nafg, i, &area1);
|
|
pix2 = pixaGetPix(pixa2, i, L_CLONE);
|
|
ptaGetPt(pta, i, &x1, &y1); /* use pta for this page */
|
|
nt = pixaGetCount(pixat); /* number of templates */
|
|
found = FALSE;
|
|
findcontext = findSimilarSizedTemplatesInit(classer, pix1);
|
|
while ((iclass = findSimilarSizedTemplatesNext(findcontext)) > -1) {
|
|
/* Find score for this template */
|
|
pix3 = pixaGetPix(pixat, iclass, L_CLONE);
|
|
numaGetIValue(nafgt, iclass, &area3);
|
|
pix4 = pixaGetPix(pixatd, iclass, L_CLONE);
|
|
ptaGetPt(ptact, iclass, &x2, &y2);
|
|
testval = pixRankHaustest(pix1, pix2, pix3, pix4,
|
|
x1 - x2, y1 - y2,
|
|
MAX_DIFF_WIDTH, MAX_DIFF_HEIGHT,
|
|
area1, area3, rank, tab8);
|
|
pixDestroy(&pix3);
|
|
pixDestroy(&pix4);
|
|
if (testval == 1) { /* greedy match; take the first */
|
|
found = TRUE;
|
|
numaAddNumber(naclass, iclass);
|
|
numaAddNumber(napage, npages);
|
|
if (classer->keep_pixaa) {
|
|
pixa = pixaaGetPixa(pixaa, iclass, L_CLONE);
|
|
pix = pixaGetPix(pixas, i, L_CLONE);
|
|
pixaAddPix(pixa, pix, L_INSERT);
|
|
box = boxaGetBox(boxa, i, L_CLONE);
|
|
pixaAddBox(pixa, box, L_INSERT);
|
|
pixaDestroy(&pixa);
|
|
}
|
|
break;
|
|
}
|
|
}
|
|
findSimilarSizedTemplatesDestroy(&findcontext);
|
|
if (found == FALSE) { /* new class */
|
|
numaAddNumber(naclass, nt);
|
|
numaAddNumber(napage, npages);
|
|
pixa = pixaCreate(0);
|
|
pix = pixaGetPix(pixas, i, L_CLONE); /* unbordered instance */
|
|
pixaAddPix(pixa, pix, L_INSERT);
|
|
wt = pixGetWidth(pix);
|
|
ht = pixGetHeight(pix);
|
|
l_dnaHashAdd(dahash, (l_uint64)ht * wt, nt);
|
|
box = boxaGetBox(boxa, i, L_CLONE);
|
|
pixaAddBox(pixa, box, L_INSERT);
|
|
pixaaAddPixa(pixaa, pixa, L_INSERT); /* unbordered instance */
|
|
ptaAddPt(ptact, x1, y1);
|
|
pixaAddPix(pixat, pix1, L_INSERT); /* bordered template */
|
|
pixaAddPix(pixatd, pix2, L_INSERT); /* ditto */
|
|
numaAddNumber(nafgt, area1);
|
|
} else { /* don't save them */
|
|
pixDestroy(&pix1);
|
|
pixDestroy(&pix2);
|
|
}
|
|
}
|
|
LEPT_FREE(tab8);
|
|
}
|
|
classer->nclass = pixaGetCount(pixat);
|
|
|
|
numaDestroy(&nafg);
|
|
ptaDestroy(&pta);
|
|
pixaDestroy(&pixa1);
|
|
pixaDestroy(&pixa2);
|
|
selDestroy(&sel);
|
|
return 0;
|
|
}
|
|
|
|
|
|
/*!
|
|
* \brief pixHaustest()
|
|
*
|
|
* \param[in] pix1 new pix, not dilated
|
|
* \param[in] pix2 new pix, dilated
|
|
* \param[in] pix3 exemplar pix, not dilated
|
|
* \param[in] pix4 exemplar pix, dilated
|
|
* \param[in] delx x comp of centroid difference
|
|
* \param[in] dely y comp of centroid difference
|
|
* \param[in] maxdiffw max width difference of pix1 and pix2
|
|
* \param[in] maxdiffh max height difference of pix1 and pix2
|
|
* \return 0 FALSE) if no match, 1 (TRUE if the new
|
|
* pix is in the same class as the exemplar.
|
|
*
|
|
* <pre>
|
|
* Notes:
|
|
* We check first that the two pix are roughly
|
|
* the same size. Only if they meet that criterion do
|
|
* we compare the bitmaps. The Hausdorff is a 2-way
|
|
* check. The centroid difference is used to align the two
|
|
* images to the nearest integer for each of the checks.
|
|
* These check that the dilated image of one contains
|
|
* ALL the pixels of the undilated image of the other.
|
|
* Checks are done in both direction. A single pixel not
|
|
* contained in either direction results in failure of the test.
|
|
* </pre>
|
|
*/
|
|
l_int32
|
|
pixHaustest(PIX *pix1,
|
|
PIX *pix2,
|
|
PIX *pix3,
|
|
PIX *pix4,
|
|
l_float32 delx, /* x(1) - x(3) */
|
|
l_float32 dely, /* y(1) - y(3) */
|
|
l_int32 maxdiffw,
|
|
l_int32 maxdiffh)
|
|
{
|
|
l_int32 wi, hi, wt, ht, delw, delh, idelx, idely, boolmatch;
|
|
PIX *pixt;
|
|
|
|
/* Eliminate possible matches based on size difference */
|
|
wi = pixGetWidth(pix1);
|
|
hi = pixGetHeight(pix1);
|
|
wt = pixGetWidth(pix3);
|
|
ht = pixGetHeight(pix3);
|
|
delw = L_ABS(wi - wt);
|
|
if (delw > maxdiffw)
|
|
return FALSE;
|
|
delh = L_ABS(hi - ht);
|
|
if (delh > maxdiffh)
|
|
return FALSE;
|
|
|
|
/* Round difference in centroid location to nearest integer;
|
|
* use this as a shift when doing the matching. */
|
|
if (delx >= 0)
|
|
idelx = (l_int32)(delx + 0.5);
|
|
else
|
|
idelx = (l_int32)(delx - 0.5);
|
|
if (dely >= 0)
|
|
idely = (l_int32)(dely + 0.5);
|
|
else
|
|
idely = (l_int32)(dely - 0.5);
|
|
|
|
/* Do 1-direction hausdorff, checking that every pixel in pix1
|
|
* is within a dilation distance of some pixel in pix3. Namely,
|
|
* that pix4 entirely covers pix1:
|
|
* pixt = pixSubtract(NULL, pix1, pix4), including shift
|
|
* where pixt has no ON pixels. */
|
|
pixt = pixCreateTemplate(pix1);
|
|
pixRasterop(pixt, 0, 0, wi, hi, PIX_SRC, pix1, 0, 0);
|
|
pixRasterop(pixt, idelx, idely, wi, hi, PIX_DST & PIX_NOT(PIX_SRC),
|
|
pix4, 0, 0);
|
|
pixZero(pixt, &boolmatch);
|
|
if (boolmatch == 0) {
|
|
pixDestroy(&pixt);
|
|
return FALSE;
|
|
}
|
|
|
|
/* Do 1-direction hausdorff, checking that every pixel in pix3
|
|
* is within a dilation distance of some pixel in pix1. Namely,
|
|
* that pix2 entirely covers pix3:
|
|
* pixSubtract(pixt, pix3, pix2), including shift
|
|
* where pixt has no ON pixels. */
|
|
pixRasterop(pixt, idelx, idely, wt, ht, PIX_SRC, pix3, 0, 0);
|
|
pixRasterop(pixt, 0, 0, wt, ht, PIX_DST & PIX_NOT(PIX_SRC), pix2, 0, 0);
|
|
pixZero(pixt, &boolmatch);
|
|
pixDestroy(&pixt);
|
|
return boolmatch;
|
|
}
|
|
|
|
|
|
/*!
|
|
* \brief pixRankHaustest()
|
|
*
|
|
* \param[in] pix1 new pix, not dilated
|
|
* \param[in] pix2 new pix, dilated
|
|
* \param[in] pix3 exemplar pix, not dilated
|
|
* \param[in] pix4 exemplar pix, dilated
|
|
* \param[in] delx x comp of centroid difference
|
|
* \param[in] dely y comp of centroid difference
|
|
* \param[in] maxdiffw max width difference of pix1 and pix2
|
|
* \param[in] maxdiffh max height difference of pix1 and pix2
|
|
* \param[in] area1 fg pixels in pix1
|
|
* \param[in] area3 fg pixels in pix3
|
|
* \param[in] rank rank value of test, each way
|
|
* \param[in] tab8 table of pixel sums for byte
|
|
* \return 0 FALSE) if no match, 1 (TRUE if the new
|
|
* pix is in the same class as the exemplar.
|
|
*
|
|
* <pre>
|
|
* Notes:
|
|
* We check first that the two pix are roughly
|
|
* the same size. Only if they meet that criterion do
|
|
* we compare the bitmaps. We convert the rank value to
|
|
* a number of pixels by multiplying the rank fraction by the number
|
|
* of pixels in the undilated image. The Hausdorff is a 2-way
|
|
* check. The centroid difference is used to align the two
|
|
* images to the nearest integer for each of the checks.
|
|
* The rank hausdorff checks that the dilated image of one
|
|
* contains the rank fraction of the pixels of the undilated
|
|
* image of the other. Checks are done in both direction.
|
|
* Failure of the test in either direction results in failure
|
|
* of the test.
|
|
* </pre>
|
|
*/
|
|
l_int32
|
|
pixRankHaustest(PIX *pix1,
|
|
PIX *pix2,
|
|
PIX *pix3,
|
|
PIX *pix4,
|
|
l_float32 delx, /* x(1) - x(3) */
|
|
l_float32 dely, /* y(1) - y(3) */
|
|
l_int32 maxdiffw,
|
|
l_int32 maxdiffh,
|
|
l_int32 area1,
|
|
l_int32 area3,
|
|
l_float32 rank,
|
|
l_int32 *tab8)
|
|
{
|
|
l_int32 wi, hi, wt, ht, delw, delh, idelx, idely, boolmatch;
|
|
l_int32 thresh1, thresh3;
|
|
PIX *pixt;
|
|
|
|
/* Eliminate possible matches based on size difference */
|
|
wi = pixGetWidth(pix1);
|
|
hi = pixGetHeight(pix1);
|
|
wt = pixGetWidth(pix3);
|
|
ht = pixGetHeight(pix3);
|
|
delw = L_ABS(wi - wt);
|
|
if (delw > maxdiffw)
|
|
return FALSE;
|
|
delh = L_ABS(hi - ht);
|
|
if (delh > maxdiffh)
|
|
return FALSE;
|
|
|
|
/* Upper bounds in remaining pixels for allowable match */
|
|
thresh1 = (l_int32)(area1 * (1. - rank) + 0.5);
|
|
thresh3 = (l_int32)(area3 * (1. - rank) + 0.5);
|
|
|
|
/* Round difference in centroid location to nearest integer;
|
|
* use this as a shift when doing the matching. */
|
|
if (delx >= 0)
|
|
idelx = (l_int32)(delx + 0.5);
|
|
else
|
|
idelx = (l_int32)(delx - 0.5);
|
|
if (dely >= 0)
|
|
idely = (l_int32)(dely + 0.5);
|
|
else
|
|
idely = (l_int32)(dely - 0.5);
|
|
|
|
/* Do 1-direction hausdorff, checking that every pixel in pix1
|
|
* is within a dilation distance of some pixel in pix3. Namely,
|
|
* that pix4 entirely covers pix1:
|
|
* pixt = pixSubtract(NULL, pix1, pix4), including shift
|
|
* where pixt has no ON pixels. */
|
|
pixt = pixCreateTemplate(pix1);
|
|
pixRasterop(pixt, 0, 0, wi, hi, PIX_SRC, pix1, 0, 0);
|
|
pixRasterop(pixt, idelx, idely, wi, hi, PIX_DST & PIX_NOT(PIX_SRC),
|
|
pix4, 0, 0);
|
|
pixThresholdPixelSum(pixt, thresh1, &boolmatch, tab8);
|
|
if (boolmatch == 1) { /* above thresh1 */
|
|
pixDestroy(&pixt);
|
|
return FALSE;
|
|
}
|
|
|
|
/* Do 1-direction hausdorff, checking that every pixel in pix3
|
|
* is within a dilation distance of some pixel in pix1. Namely,
|
|
* that pix2 entirely covers pix3:
|
|
* pixSubtract(pixt, pix3, pix2), including shift
|
|
* where pixt has no ON pixels. */
|
|
pixRasterop(pixt, idelx, idely, wt, ht, PIX_SRC, pix3, 0, 0);
|
|
pixRasterop(pixt, 0, 0, wt, ht, PIX_DST & PIX_NOT(PIX_SRC), pix2, 0, 0);
|
|
pixThresholdPixelSum(pixt, thresh3, &boolmatch, tab8);
|
|
pixDestroy(&pixt);
|
|
if (boolmatch == 1) /* above thresh3 */
|
|
return FALSE;
|
|
else
|
|
return TRUE;
|
|
}
|
|
|
|
|
|
/*----------------------------------------------------------------------*
|
|
* Classification using windowed correlation score *
|
|
*----------------------------------------------------------------------*/
|
|
/*!
|
|
* \brief jbClassifyCorrelation()
|
|
*
|
|
* \param[in] jbclasser
|
|
* \param[in] boxa new components for classification
|
|
* \param[in] pixas new components for classification
|
|
* \return 0 if OK; 1 on error
|
|
*/
|
|
l_ok
|
|
jbClassifyCorrelation(JBCLASSER *classer,
|
|
BOXA *boxa,
|
|
PIXA *pixas)
|
|
{
|
|
l_int32 n, nt, i, iclass, wt, ht, found, area, area1, area2, npages,
|
|
overthreshold;
|
|
l_int32 *sumtab, *centtab;
|
|
l_uint32 *row, word;
|
|
l_float32 x1, y1, x2, y2, xsum, ysum;
|
|
l_float32 thresh, weight, threshold;
|
|
BOX *box;
|
|
NUMA *naclass, *napage;
|
|
NUMA *nafgt; /* fg area of all templates */
|
|
NUMA *naarea; /* w * h area of all templates */
|
|
JBFINDCTX *findcontext;
|
|
L_DNAHASH *dahash;
|
|
PIX *pix, *pix1, *pix2;
|
|
PIXA *pixa, *pixa1, *pixat;
|
|
PIXAA *pixaa;
|
|
PTA *pta, *ptac, *ptact;
|
|
l_int32 *pixcts; /* pixel counts of each pixa */
|
|
l_int32 **pixrowcts; /* row-by-row pixel counts of each pixa */
|
|
l_int32 x, y, rowcount, downcount, wpl;
|
|
l_uint8 byte;
|
|
|
|
PROCNAME("jbClassifyCorrelation");
|
|
|
|
if (!classer)
|
|
return ERROR_INT("classer not found", procName, 1);
|
|
if (!boxa)
|
|
return ERROR_INT("boxa not found", procName, 1);
|
|
if (!pixas)
|
|
return ERROR_INT("pixas not found", procName, 1);
|
|
|
|
npages = classer->npages;
|
|
|
|
/* Generate the bordered pixa, which contains all the the
|
|
* input components. This will not be saved. */
|
|
if ((n = pixaGetCount(pixas)) == 0) {
|
|
L_WARNING("pixas is empty\n", procName);
|
|
return 0;
|
|
}
|
|
pixa1 = pixaCreate(n);
|
|
for (i = 0; i < n; i++) {
|
|
pix = pixaGetPix(pixas, i, L_CLONE);
|
|
pix1 = pixAddBorderGeneral(pix, JB_ADDED_PIXELS, JB_ADDED_PIXELS,
|
|
JB_ADDED_PIXELS, JB_ADDED_PIXELS, 0);
|
|
pixaAddPix(pixa1, pix1, L_INSERT);
|
|
pixDestroy(&pix);
|
|
}
|
|
|
|
/* Use these to save the class and page of each component. */
|
|
naclass = classer->naclass;
|
|
napage = classer->napage;
|
|
|
|
/* Get the number of fg pixels in each component. */
|
|
nafgt = classer->nafgt; /* holds fg areas of the templates */
|
|
sumtab = makePixelSumTab8();
|
|
|
|
pixcts = (l_int32 *)LEPT_CALLOC(n, sizeof(*pixcts));
|
|
pixrowcts = (l_int32 **)LEPT_CALLOC(n, sizeof(*pixrowcts));
|
|
centtab = makePixelCentroidTab8();
|
|
|
|
/* Count the "1" pixels in each row of the pix in pixa1; this
|
|
* allows pixCorrelationScoreThresholded to abort early if a match
|
|
* is impossible. This loop merges three calculations: the total
|
|
* number of "1" pixels, the number of "1" pixels in each row, and
|
|
* the centroid. The centroids are relative to the UL corner of
|
|
* each (bordered) pix. The pixrowcts[i][y] are the total number
|
|
* of fg pixels in pixa[i] below row y. */
|
|
pta = ptaCreate(n);
|
|
for (i = 0; i < n; i++) {
|
|
pix = pixaGetPix(pixa1, i, L_CLONE);
|
|
pixrowcts[i] = (l_int32 *)LEPT_CALLOC(pixGetHeight(pix),
|
|
sizeof(**pixrowcts));
|
|
xsum = 0;
|
|
ysum = 0;
|
|
wpl = pixGetWpl(pix);
|
|
row = pixGetData(pix) + (pixGetHeight(pix) - 1) * wpl;
|
|
downcount = 0;
|
|
for (y = pixGetHeight(pix) - 1; y >= 0; y--, row -= wpl) {
|
|
pixrowcts[i][y] = downcount;
|
|
rowcount = 0;
|
|
for (x = 0; x < wpl; x++) {
|
|
word = row[x];
|
|
byte = word & 0xff;
|
|
rowcount += sumtab[byte];
|
|
xsum += centtab[byte] + (x * 32 + 24) * sumtab[byte];
|
|
byte = (word >> 8) & 0xff;
|
|
rowcount += sumtab[byte];
|
|
xsum += centtab[byte] + (x * 32 + 16) * sumtab[byte];
|
|
byte = (word >> 16) & 0xff;
|
|
rowcount += sumtab[byte];
|
|
xsum += centtab[byte] + (x * 32 + 8) * sumtab[byte];
|
|
byte = (word >> 24) & 0xff;
|
|
rowcount += sumtab[byte];
|
|
xsum += centtab[byte] + x * 32 * sumtab[byte];
|
|
}
|
|
downcount += rowcount;
|
|
ysum += rowcount * y;
|
|
}
|
|
pixcts[i] = downcount;
|
|
if (downcount > 0) {
|
|
ptaAddPt(pta,
|
|
xsum / (l_float32)downcount, ysum / (l_float32)downcount);
|
|
} else { /* no pixels; shouldn't happen */
|
|
L_ERROR("downcount == 0 !\n", procName);
|
|
ptaAddPt(pta, pixGetWidth(pix) / 2, pixGetHeight(pix) / 2);
|
|
}
|
|
pixDestroy(&pix);
|
|
}
|
|
|
|
ptac = classer->ptac; /* holds centroids of components up to this page */
|
|
ptaJoin(ptac, pta, 0, -1); /* save centroids of all components */
|
|
ptact = classer->ptact; /* holds centroids of templates */
|
|
|
|
/* Store the unbordered pix in a pixaa, in a hierarchical
|
|
* set of arrays. There is one pixa for each class,
|
|
* and the pix in each pixa are all the instances found
|
|
* of that class. This is actually more than one would need
|
|
* for a jbig2 encoder, but there are two reasons to keep
|
|
* them around: (1) the set of instances for each class
|
|
* can be used to make an improved binary (or, better,
|
|
* a grayscale) template, rather than simply using the first
|
|
* one in the set; (2) we can investigate the failures
|
|
* of the classifier. This pixaa grows as we process
|
|
* successive pages. */
|
|
pixaa = classer->pixaa;
|
|
|
|
/* Array to store class exemplars */
|
|
pixat = classer->pixat;
|
|
|
|
/* Fill up the pixaa tree with the template exemplars as
|
|
* the first pix in each pixa. As we add each pix,
|
|
* we also add the associated box to the pixa.
|
|
* We also keep track of the centroid of each pix,
|
|
* and use the difference between centroids (of the
|
|
* pix with the exemplar we are checking it with)
|
|
* to align the two when checking that the correlation
|
|
* score exceeds a threshold. The correlation score
|
|
* is given by the square of the area of the AND
|
|
* between aligned instance and template, divided by
|
|
* the product of areas of each image. For identical
|
|
* template and instance, the score is 1.0.
|
|
* If the threshold is too small, non-equivalent instances
|
|
* will be placed in the same class; if too large, there will
|
|
* be an unnecessary division of classes representing the
|
|
* same character. The weightfactor adds in some of the
|
|
* difference (1.0 - thresh), depending on the heaviness
|
|
* of the template (measured as the fraction of fg pixels). */
|
|
thresh = classer->thresh;
|
|
weight = classer->weightfactor;
|
|
naarea = classer->naarea;
|
|
dahash = classer->dahash;
|
|
for (i = 0; i < n; i++) {
|
|
pix1 = pixaGetPix(pixa1, i, L_CLONE);
|
|
area1 = pixcts[i];
|
|
ptaGetPt(pta, i, &x1, &y1); /* centroid for this instance */
|
|
nt = pixaGetCount(pixat);
|
|
found = FALSE;
|
|
findcontext = findSimilarSizedTemplatesInit(classer, pix1);
|
|
while ( (iclass = findSimilarSizedTemplatesNext(findcontext)) > -1) {
|
|
/* Get the template */
|
|
pix2 = pixaGetPix(pixat, iclass, L_CLONE);
|
|
numaGetIValue(nafgt, iclass, &area2);
|
|
ptaGetPt(ptact, iclass, &x2, &y2); /* template centroid */
|
|
|
|
/* Find threshold for this template */
|
|
if (weight > 0.0) {
|
|
numaGetIValue(naarea, iclass, &area);
|
|
threshold = thresh + (1. - thresh) * weight * area2 / area;
|
|
} else {
|
|
threshold = thresh;
|
|
}
|
|
|
|
/* Find score for this template */
|
|
overthreshold = pixCorrelationScoreThresholded(pix1, pix2,
|
|
area1, area2, x1 - x2, y1 - y2,
|
|
MAX_DIFF_WIDTH, MAX_DIFF_HEIGHT,
|
|
sumtab, pixrowcts[i], threshold);
|
|
#if DEBUG_CORRELATION_SCORE
|
|
{
|
|
l_float32 score, testscore;
|
|
l_int32 count, testcount;
|
|
pixCorrelationScore(pix1, pix2, area1, area2, x1 - x2, y1 - y2,
|
|
MAX_DIFF_WIDTH, MAX_DIFF_HEIGHT,
|
|
sumtab, &score);
|
|
|
|
pixCorrelationScoreSimple(pix1, pix2, area1, area2,
|
|
x1 - x2, y1 - y2, MAX_DIFF_WIDTH,
|
|
MAX_DIFF_HEIGHT, sumtab, &testscore);
|
|
count = (l_int32)rint(sqrt(score * area1 * area2));
|
|
testcount = (l_int32)rint(sqrt(testscore * area1 * area2));
|
|
if ((score >= threshold) != (testscore >= threshold)) {
|
|
fprintf(stderr, "Correlation score mismatch: "
|
|
"%d(%g,%d) vs %d(%g,%d) (%g)\n",
|
|
count, score, score >= threshold,
|
|
testcount, testscore, testscore >= threshold,
|
|
score - testscore);
|
|
}
|
|
|
|
if ((score >= threshold) != overthreshold) {
|
|
fprintf(stderr, "Mismatch between correlation/threshold "
|
|
"comparison: %g(%g,%d) >= %g(%g) vs %s\n",
|
|
score, score*area1*area2, count, threshold,
|
|
threshold*area1*area2,
|
|
(overthreshold ? "true" : "false"));
|
|
}
|
|
}
|
|
#endif /* DEBUG_CORRELATION_SCORE */
|
|
pixDestroy(&pix2);
|
|
|
|
if (overthreshold) { /* greedy match */
|
|
found = TRUE;
|
|
numaAddNumber(naclass, iclass);
|
|
numaAddNumber(napage, npages);
|
|
if (classer->keep_pixaa) {
|
|
/* We are keeping a record of all components */
|
|
pixa = pixaaGetPixa(pixaa, iclass, L_CLONE);
|
|
pix = pixaGetPix(pixas, i, L_CLONE);
|
|
pixaAddPix(pixa, pix, L_INSERT);
|
|
box = boxaGetBox(boxa, i, L_CLONE);
|
|
pixaAddBox(pixa, box, L_INSERT);
|
|
pixaDestroy(&pixa);
|
|
}
|
|
break;
|
|
}
|
|
}
|
|
findSimilarSizedTemplatesDestroy(&findcontext);
|
|
if (found == FALSE) { /* new class */
|
|
numaAddNumber(naclass, nt);
|
|
numaAddNumber(napage, npages);
|
|
pixa = pixaCreate(0);
|
|
pix = pixaGetPix(pixas, i, L_CLONE); /* unbordered instance */
|
|
pixaAddPix(pixa, pix, L_INSERT);
|
|
wt = pixGetWidth(pix);
|
|
ht = pixGetHeight(pix);
|
|
l_dnaHashAdd(dahash, (l_uint64)ht * wt, nt);
|
|
box = boxaGetBox(boxa, i, L_CLONE);
|
|
pixaAddBox(pixa, box, L_INSERT);
|
|
pixaaAddPixa(pixaa, pixa, L_INSERT); /* unbordered instance */
|
|
ptaAddPt(ptact, x1, y1);
|
|
numaAddNumber(nafgt, area1);
|
|
pixaAddPix(pixat, pix1, L_INSERT); /* bordered template */
|
|
area = (pixGetWidth(pix1) - 2 * JB_ADDED_PIXELS) *
|
|
(pixGetHeight(pix1) - 2 * JB_ADDED_PIXELS);
|
|
numaAddNumber(naarea, area);
|
|
} else { /* don't save it */
|
|
pixDestroy(&pix1);
|
|
}
|
|
}
|
|
classer->nclass = pixaGetCount(pixat);
|
|
|
|
LEPT_FREE(pixcts);
|
|
LEPT_FREE(centtab);
|
|
for (i = 0; i < n; i++) {
|
|
LEPT_FREE(pixrowcts[i]);
|
|
}
|
|
LEPT_FREE(pixrowcts);
|
|
|
|
LEPT_FREE(sumtab);
|
|
ptaDestroy(&pta);
|
|
pixaDestroy(&pixa1);
|
|
return 0;
|
|
}
|
|
|
|
|
|
/*----------------------------------------------------------------------*
|
|
* Determine the image components we start with *
|
|
*----------------------------------------------------------------------*/
|
|
/*!
|
|
* \brief jbGetComponents()
|
|
*
|
|
* \param[in] pixs 1 bpp
|
|
* \param[in] components JB_CONN_COMPS, JB_CHARACTERS, JB_WORDS
|
|
* \param[in] maxwidth of saved components; larger are discarded
|
|
* \param[in] maxheight of saved components; larger are discarded
|
|
* \param[out] ppboxa b.b. of component items
|
|
* \param[out] pppixa component items
|
|
* \return 0 if OK, 1 on error
|
|
*/
|
|
l_ok
|
|
jbGetComponents(PIX *pixs,
|
|
l_int32 components,
|
|
l_int32 maxwidth,
|
|
l_int32 maxheight,
|
|
BOXA **pboxad,
|
|
PIXA **ppixad)
|
|
{
|
|
l_int32 empty, res, redfactor;
|
|
BOXA *boxa;
|
|
PIX *pix1, *pix2, *pix3;
|
|
PIXA *pixa, *pixat;
|
|
|
|
PROCNAME("jbGetComponents");
|
|
|
|
if (!pboxad)
|
|
return ERROR_INT("&boxad not defined", procName, 1);
|
|
*pboxad = NULL;
|
|
if (!ppixad)
|
|
return ERROR_INT("&pixad not defined", procName, 1);
|
|
*ppixad = NULL;
|
|
if (!pixs)
|
|
return ERROR_INT("pixs not defined", procName, 1);
|
|
if (components != JB_CONN_COMPS && components != JB_CHARACTERS &&
|
|
components != JB_WORDS)
|
|
return ERROR_INT("invalid components", procName, 1);
|
|
|
|
pixZero(pixs, &empty);
|
|
if (empty) {
|
|
*pboxad = boxaCreate(0);
|
|
*ppixad = pixaCreate(0);
|
|
return 0;
|
|
}
|
|
|
|
/* If required, preprocess input pixs. The method for both
|
|
* characters and words is to generate a connected component
|
|
* mask over the units that we want to aggregrate, which are,
|
|
* in general, sets of related connected components in pixs.
|
|
* For characters, we want to include the dots with
|
|
* 'i', 'j' and '!', so we do a small vertical closing to
|
|
* generate the mask. For words, we make a mask over all
|
|
* characters in each word. This is a bit more tricky, because
|
|
* the spacing between words is difficult to predict a priori,
|
|
* and words can be typeset with variable spacing that can
|
|
* in some cases be barely larger than the space between
|
|
* characters. The first step is to generate the mask and
|
|
* identify each of its connected components. */
|
|
if (components == JB_CONN_COMPS) { /* no preprocessing */
|
|
boxa = pixConnComp(pixs, &pixa, 8);
|
|
} else if (components == JB_CHARACTERS) {
|
|
pix1 = pixMorphSequence(pixs, "c1.6", 0);
|
|
boxa = pixConnComp(pix1, &pixat, 8);
|
|
pixa = pixaClipToPix(pixat, pixs);
|
|
pixDestroy(&pix1);
|
|
pixaDestroy(&pixat);
|
|
} else { /* components == JB_WORDS */
|
|
|
|
/* Do the operations at about 150 ppi resolution.
|
|
* It is much faster at 75 ppi, but the results are
|
|
* more accurate at 150 ppi. This will segment the
|
|
* words in body text. It can be expected that relatively
|
|
* infrequent words in a larger font will be split. */
|
|
res = pixGetXRes(pixs);
|
|
if (res <= 200) {
|
|
redfactor = 1;
|
|
pix1 = pixClone(pixs);
|
|
} else if (res <= 400) {
|
|
redfactor = 2;
|
|
pix1 = pixReduceRankBinaryCascade(pixs, 1, 0, 0, 0);
|
|
} else {
|
|
redfactor = 4;
|
|
pix1 = pixReduceRankBinaryCascade(pixs, 1, 1, 0, 0);
|
|
}
|
|
|
|
/* Estimate the word mask, at approximately 150 ppi.
|
|
* This has both very large and very small components left in. */
|
|
pixWordMaskByDilation(pix1, &pix2, NULL, NULL);
|
|
|
|
/* Expand the optimally dilated word mask to full res. */
|
|
pix3 = pixExpandReplicate(pix2, redfactor);
|
|
|
|
/* Pull out the pixels in pixs corresponding to the mask
|
|
* components in pix3. Note that above we used threshold
|
|
* levels in the reduction of 1 to insure that the resulting
|
|
* mask fully covers the input pixs. The downside of using
|
|
* a threshold of 1 is that very close characters from adjacent
|
|
* lines can be joined. But with a level of 2 or greater,
|
|
* it is necessary to use a seedfill, followed by a pixOr():
|
|
* pixt4 = pixSeedfillBinary(NULL, pix3, pixs, 8);
|
|
* pixOr(pix3, pix3, pixt4);
|
|
* to insure that the mask coverage is complete over pixs. */
|
|
boxa = pixConnComp(pix3, &pixat, 4);
|
|
pixa = pixaClipToPix(pixat, pixs);
|
|
pixaDestroy(&pixat);
|
|
pixDestroy(&pix1);
|
|
pixDestroy(&pix2);
|
|
pixDestroy(&pix3);
|
|
}
|
|
|
|
/* Remove large components, and save the results. */
|
|
*ppixad = pixaSelectBySize(pixa, maxwidth, maxheight, L_SELECT_IF_BOTH,
|
|
L_SELECT_IF_LTE, NULL);
|
|
*pboxad = boxaSelectBySize(boxa, maxwidth, maxheight, L_SELECT_IF_BOTH,
|
|
L_SELECT_IF_LTE, NULL);
|
|
pixaDestroy(&pixa);
|
|
boxaDestroy(&boxa);
|
|
|
|
return 0;
|
|
}
|
|
|
|
|
|
/*!
|
|
* \brief pixWordMaskByDilation()
|
|
*
|
|
* \param[in] pixs 1 bpp; typ. at 75 to 150 ppi
|
|
* \param[out] pmask [optional] dilated word mask
|
|
* \param[out] psize [optional] size of good horizontal dilation
|
|
* \param[out] pixadb [optional] debug: pixa of intermediate steps
|
|
* \return 0 if OK, 1 on error
|
|
*
|
|
* <pre>
|
|
* Notes:
|
|
* (1) This gives an estimate of the word masks. See
|
|
* pixWordBoxesByDilation() for further filtering of the word boxes.
|
|
* (2) The resolution should be between 75 and 150 ppi, and the optimal
|
|
* dilation will be between 3 and 10.
|
|
* (3) A good size for dilating to get word masks is optionally returned.
|
|
* (4) Typically, the number of c.c. reduced with each successive
|
|
* dilation (stored in nadiff) decreases quickly to a minimum
|
|
* (where the characters in a word are joined), and then
|
|
* increases again as the smaller number of words are joined.
|
|
* For the typical case, you can then look for this minimum
|
|
* and dilate to get the word mask. However, there are many
|
|
* cases where the function is not so simple. For example, if the
|
|
* pix has been upscaled 2x, the nadiff function oscillates, with
|
|
* every other value being zero! And for some images it tails
|
|
* off without a clear minimum to indicate where to break.
|
|
* So a more simple and robust method is to find the dilation
|
|
* where the initial number of c.c. has been reduced by some
|
|
* fraction (we use a 70% reduction).
|
|
* </pre>
|
|
*/
|
|
l_ok
|
|
pixWordMaskByDilation(PIX *pixs,
|
|
PIX **ppixm,
|
|
l_int32 *psize,
|
|
PIXA *pixadb)
|
|
{
|
|
l_int32 i, n, ndil, maxdiff, diff, ibest;
|
|
l_int32 check, count, total, xres;
|
|
l_int32 ncc[13]; /* max dilation + 1 */
|
|
l_int32 *diffa;
|
|
BOXA *boxa;
|
|
NUMA *nacc, *nadiff;
|
|
PIX *pix1, *pix2;
|
|
|
|
PROCNAME("pixWordMaskByDilation");
|
|
|
|
if (ppixm) *ppixm = NULL;
|
|
if (psize) *psize = 0;
|
|
if (!pixs || pixGetDepth(pixs) != 1)
|
|
return ERROR_INT("pixs undefined or not 1 bpp", procName, 1);
|
|
if (!ppixm && !psize)
|
|
return ERROR_INT("no output requested", procName, 1);
|
|
|
|
/* Find a good dilation to create the word mask, by successively
|
|
* increasing dilation size and counting the connected components. */
|
|
pix1 = pixCopy(NULL, pixs);
|
|
ndil = 12; /* appropriate for 75 to 150 ppi */
|
|
nacc = numaCreate(ndil + 1);
|
|
nadiff = numaCreate(ndil + 1);
|
|
for (i = 0; i <= ndil; i++) {
|
|
if (i == 0) /* first one not dilated */
|
|
pix2 = pixCopy(NULL, pix1);
|
|
else /* successive dilation by sel_2h */
|
|
pix2 = pixMorphSequence(pix1, "d2.1", 0);
|
|
boxa = pixConnCompBB(pix2, 4);
|
|
ncc[i] = boxaGetCount(boxa);
|
|
numaAddNumber(nacc, ncc[i]);
|
|
if (i == 0) total = ncc[0];
|
|
if (i > 0) {
|
|
diff = ncc[i - 1] - ncc[i];
|
|
numaAddNumber(nadiff, diff);
|
|
}
|
|
pixDestroy(&pix1);
|
|
pix1 = pix2;
|
|
boxaDestroy(&boxa);
|
|
}
|
|
pixDestroy(&pix1);
|
|
|
|
/* Find the dilation at which the c.c. count has reduced
|
|
* to 30% of the initial value. Although 30% seems high,
|
|
* it seems better to use this but add one to ibest. */
|
|
diffa = numaGetIArray(nadiff);
|
|
n = numaGetCount(nadiff);
|
|
maxdiff = 0;
|
|
check = TRUE;
|
|
ibest = 2;
|
|
for (i = 1; i < n; i++) {
|
|
numaGetIValue(nacc, i, &count);
|
|
if (check && count < 0.3 * total) {
|
|
ibest = i + 1;
|
|
check = FALSE;
|
|
}
|
|
diff = diffa[i];
|
|
if (diff > maxdiff)
|
|
maxdiff = diff;
|
|
}
|
|
LEPT_FREE(diffa);
|
|
|
|
/* Add small compensation for higher resolution */
|
|
xres = pixGetXRes(pixs);
|
|
if (xres == 0) xres = 150;
|
|
if (xres > 110) ibest++;
|
|
if (ibest < 2) {
|
|
L_INFO("setting ibest to minimum allowed value of 2\n", procName);
|
|
ibest = 2;
|
|
}
|
|
|
|
if (pixadb) {
|
|
lept_mkdir("lept/jb");
|
|
{GPLOT *gplot;
|
|
NUMA *naseq;
|
|
PIX *pix3, *pix4;
|
|
L_INFO("Best dilation: %d\n", procName, L_MAX(3, ibest + 1));
|
|
naseq = numaMakeSequence(1, 1, numaGetCount(nacc));
|
|
gplot = gplotCreate("/tmp/lept/jb/numcc", GPLOT_PNG,
|
|
"Number of cc vs. horizontal dilation",
|
|
"Sel horiz", "Number of cc");
|
|
gplotAddPlot(gplot, naseq, nacc, GPLOT_LINES, "");
|
|
gplotMakeOutput(gplot);
|
|
gplotDestroy(&gplot);
|
|
pix3 = pixRead("/tmp/lept/jb/numcc.png");
|
|
pixaAddPix(pixadb, pix3, L_INSERT);
|
|
numaDestroy(&naseq);
|
|
naseq = numaMakeSequence(1, 1, numaGetCount(nadiff));
|
|
gplot = gplotCreate("/tmp/lept/jb/diffcc", GPLOT_PNG,
|
|
"Diff count of cc vs. horizontal dilation",
|
|
"Sel horiz", "Diff in cc");
|
|
gplotAddPlot(gplot, naseq, nadiff, GPLOT_LINES, "");
|
|
gplotMakeOutput(gplot);
|
|
gplotDestroy(&gplot);
|
|
pix3 = pixRead("/tmp/lept/jb/diffcc.png");
|
|
pixaAddPix(pixadb, pix3, L_INSERT);
|
|
numaDestroy(&naseq);
|
|
pix3 = pixCloseBrick(NULL, pixs, ibest + 1, 1);
|
|
pix4 = pixScaleToSize(pix3, 600, 0);
|
|
pixaAddPix(pixadb, pix4, L_INSERT);
|
|
pixDestroy(&pix3);
|
|
}
|
|
}
|
|
|
|
if (psize) *psize = ibest + 1;
|
|
if (ppixm)
|
|
*ppixm = pixCloseBrick(NULL, pixs, ibest + 1, 1);
|
|
|
|
numaDestroy(&nacc);
|
|
numaDestroy(&nadiff);
|
|
return 0;
|
|
}
|
|
|
|
|
|
/*!
|
|
* \brief pixWordBoxesByDilation()
|
|
*
|
|
* \param[in] pixs 1 bpp; typ. 75 - 200 ppi
|
|
* \param[in] minwidth saved components; smaller are discarded
|
|
* \param[in] minheight saved components; smaller are discarded
|
|
* \param[in] maxwidth saved components; larger are discarded
|
|
* \param[in] maxheight saved components; larger are discarded
|
|
* \param[out] pboxa of dilated word mask
|
|
* \param[out] psize [optional] size of good horizontal dilation
|
|
* \param[out] pixadb [optional] debug: pixa of intermediate steps
|
|
* \return 0 if OK, 1 on error
|
|
*
|
|
* <pre>
|
|
* Notes:
|
|
* (1) Returns a pruned set of word boxes.
|
|
* (2) See pixWordMaskByDilation().
|
|
* </pre>
|
|
*/
|
|
l_ok
|
|
pixWordBoxesByDilation(PIX *pixs,
|
|
l_int32 minwidth,
|
|
l_int32 minheight,
|
|
l_int32 maxwidth,
|
|
l_int32 maxheight,
|
|
BOXA **pboxa,
|
|
l_int32 *psize,
|
|
PIXA *pixadb)
|
|
{
|
|
BOXA *boxa1, *boxa2;
|
|
PIX *pix1, *pix2;
|
|
|
|
PROCNAME("pixWordBoxesByDilation");
|
|
|
|
if (psize) *psize = 0;
|
|
if (!pixs || pixGetDepth(pixs) != 1)
|
|
return ERROR_INT("pixs undefined or not 1 bpp", procName, 1);
|
|
if (!pboxa)
|
|
return ERROR_INT("&boxa not defined", procName, 1);
|
|
*pboxa = NULL;
|
|
|
|
/* Make a first estimate of the word mask */
|
|
if (pixWordMaskByDilation(pixs, &pix1, psize, pixadb))
|
|
return ERROR_INT("pixWordMaskByDilation() failed", procName, 1);
|
|
|
|
/* Prune the word mask. Get the bounding boxes of the words.
|
|
* Remove the small ones, which can be due to punctuation
|
|
* that was not joined to a word. Also remove the large ones,
|
|
* which are not likely to be words. */
|
|
boxa1 = pixConnComp(pix1, NULL, 8);
|
|
boxa2 = boxaSelectBySize(boxa1, minwidth, minheight, L_SELECT_IF_BOTH,
|
|
L_SELECT_IF_GTE, NULL);
|
|
*pboxa = boxaSelectBySize(boxa2, maxwidth, maxheight, L_SELECT_IF_BOTH,
|
|
L_SELECT_IF_LTE, NULL);
|
|
if (pixadb) {
|
|
pix2 = pixUnpackBinary(pixs, 32, 1);
|
|
pixRenderBoxaArb(pix2, boxa1, 2, 255, 0, 0);
|
|
pixaAddPix(pixadb, pix2, L_INSERT);
|
|
pix2 = pixUnpackBinary(pixs, 32, 1);
|
|
pixRenderBoxaArb(pix2, boxa2, 2, 0, 255, 0);
|
|
pixaAddPix(pixadb, pix2, L_INSERT);
|
|
}
|
|
boxaDestroy(&boxa1);
|
|
boxaDestroy(&boxa2);
|
|
pixDestroy(&pix1);
|
|
return 0;
|
|
}
|
|
|
|
|
|
/*----------------------------------------------------------------------*
|
|
* Build grayscale composites (templates) *
|
|
*----------------------------------------------------------------------*/
|
|
/*!
|
|
* \brief jbAccumulateComposites()
|
|
*
|
|
* \param[in] pixaa one pixa for each class
|
|
* \param[out] ppna number of samples used to build each composite
|
|
* \param[out] pptat centroids of bordered composites
|
|
* \return pixad accumulated sum of samples in each class, or NULL on error
|
|
*
|
|
*/
|
|
PIXA *
|
|
jbAccumulateComposites(PIXAA *pixaa,
|
|
NUMA **pna,
|
|
PTA **pptat)
|
|
{
|
|
l_int32 n, nt, i, j, d, minw, maxw, minh, maxh, xdiff, ydiff;
|
|
l_float32 x, y, xave, yave;
|
|
NUMA *na;
|
|
PIX *pix, *pixt1, *pixt2, *pixsum;
|
|
PIXA *pixa, *pixad;
|
|
PTA *ptat, *pta;
|
|
|
|
PROCNAME("jbAccumulateComposites");
|
|
|
|
if (!pptat)
|
|
return (PIXA *)ERROR_PTR("&ptat not defined", procName, NULL);
|
|
*pptat = NULL;
|
|
if (!pna)
|
|
return (PIXA *)ERROR_PTR("&na not defined", procName, NULL);
|
|
*pna = NULL;
|
|
if (!pixaa)
|
|
return (PIXA *)ERROR_PTR("pixaa not defined", procName, NULL);
|
|
|
|
n = pixaaGetCount(pixaa, NULL);
|
|
if ((ptat = ptaCreate(n)) == NULL)
|
|
return (PIXA *)ERROR_PTR("ptat not made", procName, NULL);
|
|
*pptat = ptat;
|
|
pixad = pixaCreate(n);
|
|
na = numaCreate(n);
|
|
*pna = na;
|
|
|
|
for (i = 0; i < n; i++) {
|
|
pixa = pixaaGetPixa(pixaa, i, L_CLONE);
|
|
nt = pixaGetCount(pixa);
|
|
numaAddNumber(na, nt);
|
|
if (nt == 0) {
|
|
L_WARNING("empty pixa found!\n", procName);
|
|
pixaDestroy(&pixa);
|
|
continue;
|
|
}
|
|
pixaSizeRange(pixa, &minw, &minh, &maxw, &maxh);
|
|
pix = pixaGetPix(pixa, 0, L_CLONE);
|
|
d = pixGetDepth(pix);
|
|
pixDestroy(&pix);
|
|
pixt1 = pixCreate(maxw, maxh, d);
|
|
pixsum = pixInitAccumulate(maxw, maxh, 0);
|
|
pta = pixaCentroids(pixa);
|
|
|
|
/* Find the average value of the centroids ... */
|
|
xave = yave = 0;
|
|
for (j = 0; j < nt; j++) {
|
|
ptaGetPt(pta, j, &x, &y);
|
|
xave += x;
|
|
yave += y;
|
|
}
|
|
xave = xave / (l_float32)nt;
|
|
yave = yave / (l_float32)nt;
|
|
|
|
/* and place all centroids at their average value */
|
|
for (j = 0; j < nt; j++) {
|
|
pixt2 = pixaGetPix(pixa, j, L_CLONE);
|
|
ptaGetPt(pta, j, &x, &y);
|
|
xdiff = (l_int32)(x - xave);
|
|
ydiff = (l_int32)(y - yave);
|
|
pixClearAll(pixt1);
|
|
pixRasterop(pixt1, xdiff, ydiff, maxw, maxh, PIX_SRC,
|
|
pixt2, 0, 0);
|
|
pixAccumulate(pixsum, pixt1, L_ARITH_ADD);
|
|
pixDestroy(&pixt2);
|
|
}
|
|
pixaAddPix(pixad, pixsum, L_INSERT);
|
|
ptaAddPt(ptat, xave, yave);
|
|
|
|
pixaDestroy(&pixa);
|
|
pixDestroy(&pixt1);
|
|
ptaDestroy(&pta);
|
|
}
|
|
|
|
return pixad;
|
|
}
|
|
|
|
|
|
/*!
|
|
* \brief jbTemplatesFromComposites()
|
|
*
|
|
* \param[in] pixac one pix of composites for each class
|
|
* \param[in] na number of samples used for each class composite
|
|
* \return pixad 8 bpp templates for each class, or NULL on error
|
|
*
|
|
*/
|
|
PIXA *
|
|
jbTemplatesFromComposites(PIXA *pixac,
|
|
NUMA *na)
|
|
{
|
|
l_int32 n, i;
|
|
l_float32 nt; /* number of samples in the composite; always an integer */
|
|
l_float32 factor;
|
|
PIX *pixsum; /* accumulated composite */
|
|
PIX *pixd;
|
|
PIXA *pixad;
|
|
|
|
PROCNAME("jbTemplatesFromComposites");
|
|
|
|
if (!pixac)
|
|
return (PIXA *)ERROR_PTR("pixac not defined", procName, NULL);
|
|
if (!na)
|
|
return (PIXA *)ERROR_PTR("na not defined", procName, NULL);
|
|
|
|
n = pixaGetCount(pixac);
|
|
pixad = pixaCreate(n);
|
|
for (i = 0; i < n; i++) {
|
|
pixsum = pixaGetPix(pixac, i, L_COPY); /* changed internally */
|
|
numaGetFValue(na, i, &nt);
|
|
factor = 255. / nt;
|
|
pixMultConstAccumulate(pixsum, factor, 0); /* changes pixsum */
|
|
pixd = pixFinalAccumulate(pixsum, 0, 8);
|
|
pixaAddPix(pixad, pixd, L_INSERT);
|
|
pixDestroy(&pixsum);
|
|
}
|
|
|
|
return pixad;
|
|
}
|
|
|
|
|
|
|
|
/*----------------------------------------------------------------------*
|
|
* jbig2 utility routines *
|
|
*----------------------------------------------------------------------*/
|
|
/*!
|
|
* \brief jbClasserCreate()
|
|
*
|
|
* \param[in] method JB_RANKHAUS, JB_CORRELATION
|
|
* \param[in] components JB_CONN_COMPS, JB_CHARACTERS, JB_WORDS
|
|
* \return jbclasser, or NULL on error
|
|
*/
|
|
JBCLASSER *
|
|
jbClasserCreate(l_int32 method,
|
|
l_int32 components)
|
|
{
|
|
JBCLASSER *classer;
|
|
|
|
PROCNAME("jbClasserCreate");
|
|
|
|
if (method != JB_RANKHAUS && method != JB_CORRELATION)
|
|
return (JBCLASSER *)ERROR_PTR("invalid method", procName, NULL);
|
|
if (components != JB_CONN_COMPS && components != JB_CHARACTERS &&
|
|
components != JB_WORDS)
|
|
return (JBCLASSER *)ERROR_PTR("invalid component", procName, NULL);
|
|
|
|
classer = (JBCLASSER *)LEPT_CALLOC(1, sizeof(JBCLASSER));
|
|
classer->method = method;
|
|
classer->components = components;
|
|
classer->nacomps = numaCreate(0);
|
|
classer->pixaa = pixaaCreate(0);
|
|
classer->pixat = pixaCreate(0);
|
|
classer->pixatd = pixaCreate(0);
|
|
classer->nafgt = numaCreate(0);
|
|
classer->naarea = numaCreate(0);
|
|
classer->ptac = ptaCreate(0);
|
|
classer->ptact = ptaCreate(0);
|
|
classer->naclass = numaCreate(0);
|
|
classer->napage = numaCreate(0);
|
|
classer->ptaul = ptaCreate(0);
|
|
return classer;
|
|
}
|
|
|
|
|
|
/*
|
|
* \brief jbClasserDestroy()
|
|
*
|
|
* \param[in,out] pclasser will be set to null before returning
|
|
* \return void
|
|
*/
|
|
void
|
|
jbClasserDestroy(JBCLASSER **pclasser)
|
|
{
|
|
JBCLASSER *classer;
|
|
|
|
if (!pclasser)
|
|
return;
|
|
if ((classer = *pclasser) == NULL)
|
|
return;
|
|
|
|
sarrayDestroy(&classer->safiles);
|
|
numaDestroy(&classer->nacomps);
|
|
pixaaDestroy(&classer->pixaa);
|
|
pixaDestroy(&classer->pixat);
|
|
pixaDestroy(&classer->pixatd);
|
|
l_dnaHashDestroy(&classer->dahash);
|
|
numaDestroy(&classer->nafgt);
|
|
numaDestroy(&classer->naarea);
|
|
ptaDestroy(&classer->ptac);
|
|
ptaDestroy(&classer->ptact);
|
|
numaDestroy(&classer->naclass);
|
|
numaDestroy(&classer->napage);
|
|
ptaDestroy(&classer->ptaul);
|
|
ptaDestroy(&classer->ptall);
|
|
LEPT_FREE(classer);
|
|
*pclasser = NULL;
|
|
return;
|
|
}
|
|
|
|
|
|
/*!
|
|
* \brief jbDataSave()
|
|
*
|
|
* \param[in] jbclasser
|
|
* \param[in] latticew cell width used to store each connected
|
|
* component in the composite
|
|
* \param[in] latticeh ditto for cell height
|
|
* \return jbdata, or NULL on error
|
|
*
|
|
* <pre>
|
|
* Notes:
|
|
* (1) This routine stores the jbig2-type data required for
|
|
* generating a lossy jbig2 version of the image.
|
|
* It can be losslessly written to (and read from) two files.
|
|
* (2) It generates and stores the mosaic of templates.
|
|
* (3) It clones the Numa and Pta arrays, so these must all
|
|
* be destroyed by the caller.
|
|
* (4) Input 0 to use the default values for latticew and/or latticeh,
|
|
* </pre>
|
|
*/
|
|
JBDATA *
|
|
jbDataSave(JBCLASSER *classer)
|
|
{
|
|
l_int32 maxw, maxh;
|
|
JBDATA *data;
|
|
PIX *pix;
|
|
|
|
PROCNAME("jbDataSave");
|
|
|
|
if (!classer)
|
|
return (JBDATA *)ERROR_PTR("classer not defined", procName, NULL);
|
|
|
|
/* Write the templates into an array. */
|
|
pixaSizeRange(classer->pixat, NULL, NULL, &maxw, &maxh);
|
|
pix = pixaDisplayOnLattice(classer->pixat, maxw + 1, maxh + 1,
|
|
NULL, NULL);
|
|
if (!pix)
|
|
return (JBDATA *)ERROR_PTR("data not made", procName, NULL);
|
|
|
|
data = (JBDATA *)LEPT_CALLOC(1, sizeof(JBDATA));
|
|
data->pix = pix;
|
|
data->npages = classer->npages;
|
|
data->w = classer->w;
|
|
data->h = classer->h;
|
|
data->nclass = classer->nclass;
|
|
data->latticew = maxw + 1;
|
|
data->latticeh = maxh + 1;
|
|
data->naclass = numaClone(classer->naclass);
|
|
data->napage = numaClone(classer->napage);
|
|
data->ptaul = ptaClone(classer->ptaul);
|
|
return data;
|
|
}
|
|
|
|
|
|
/*
|
|
* \brief jbDataDestroy()
|
|
*
|
|
* \param[in,out] pdata will be set to null before returning
|
|
* \return void
|
|
*/
|
|
void
|
|
jbDataDestroy(JBDATA **pdata)
|
|
{
|
|
JBDATA *data;
|
|
|
|
if (!pdata)
|
|
return;
|
|
if ((data = *pdata) == NULL)
|
|
return;
|
|
|
|
pixDestroy(&data->pix);
|
|
numaDestroy(&data->naclass);
|
|
numaDestroy(&data->napage);
|
|
ptaDestroy(&data->ptaul);
|
|
LEPT_FREE(data);
|
|
*pdata = NULL;
|
|
return;
|
|
}
|
|
|
|
|
|
/*!
|
|
* \brief jbDataWrite()
|
|
*
|
|
* \param[in] rootname for output files; everything but the extension
|
|
* \param[in] jbdata
|
|
* \return 0 if OK, 1 on error
|
|
*
|
|
* <pre>
|
|
* Notes:
|
|
* (1) Serialization function that writes data in jbdata to file.
|
|
* </pre>
|
|
*/
|
|
l_ok
|
|
jbDataWrite(const char *rootout,
|
|
JBDATA *jbdata)
|
|
{
|
|
char buf[L_BUF_SIZE];
|
|
l_int32 w, h, nclass, npages, cellw, cellh, ncomp, i, x, y, iclass, ipage;
|
|
NUMA *naclass, *napage;
|
|
PTA *ptaul;
|
|
PIX *pixt;
|
|
FILE *fp;
|
|
|
|
PROCNAME("jbDataWrite");
|
|
|
|
if (!rootout)
|
|
return ERROR_INT("no rootout", procName, 1);
|
|
if (!jbdata)
|
|
return ERROR_INT("no jbdata", procName, 1);
|
|
|
|
npages = jbdata->npages;
|
|
w = jbdata->w;
|
|
h = jbdata->h;
|
|
pixt = jbdata->pix;
|
|
nclass = jbdata->nclass;
|
|
cellw = jbdata->latticew;
|
|
cellh = jbdata->latticeh;
|
|
naclass = jbdata->naclass;
|
|
napage = jbdata->napage;
|
|
ptaul = jbdata->ptaul;
|
|
|
|
snprintf(buf, L_BUF_SIZE, "%s%s", rootout, JB_TEMPLATE_EXT);
|
|
pixWrite(buf, pixt, IFF_PNG);
|
|
|
|
snprintf(buf, L_BUF_SIZE, "%s%s", rootout, JB_DATA_EXT);
|
|
if ((fp = fopenWriteStream(buf, "wb")) == NULL)
|
|
return ERROR_INT("stream not opened", procName, 1);
|
|
ncomp = ptaGetCount(ptaul);
|
|
fprintf(fp, "jb data file\n");
|
|
fprintf(fp, "num pages = %d\n", npages);
|
|
fprintf(fp, "page size: w = %d, h = %d\n", w, h);
|
|
fprintf(fp, "num components = %d\n", ncomp);
|
|
fprintf(fp, "num classes = %d\n", nclass);
|
|
fprintf(fp, "template lattice size: w = %d, h = %d\n", cellw, cellh);
|
|
for (i = 0; i < ncomp; i++) {
|
|
numaGetIValue(napage, i, &ipage);
|
|
numaGetIValue(naclass, i, &iclass);
|
|
ptaGetIPt(ptaul, i, &x, &y);
|
|
fprintf(fp, "%d %d %d %d\n", ipage, iclass, x, y);
|
|
}
|
|
fclose(fp);
|
|
|
|
return 0;
|
|
}
|
|
|
|
|
|
/*!
|
|
* \brief jbDataRead()
|
|
*
|
|
* \param[in] rootname for template and data files
|
|
* \return jbdata, or NULL on error
|
|
*/
|
|
JBDATA *
|
|
jbDataRead(const char *rootname)
|
|
{
|
|
char fname[L_BUF_SIZE];
|
|
char *linestr;
|
|
l_uint8 *data;
|
|
l_int32 nsa, i, w, h, cellw, cellh, x, y, iclass, ipage;
|
|
l_int32 npages, nclass, ncomp, ninit;
|
|
size_t size;
|
|
JBDATA *jbdata;
|
|
NUMA *naclass, *napage;
|
|
PIX *pixs;
|
|
PTA *ptaul;
|
|
SARRAY *sa;
|
|
|
|
PROCNAME("jbDataRead");
|
|
|
|
if (!rootname)
|
|
return (JBDATA *)ERROR_PTR("rootname not defined", procName, NULL);
|
|
|
|
snprintf(fname, L_BUF_SIZE, "%s%s", rootname, JB_TEMPLATE_EXT);
|
|
if ((pixs = pixRead(fname)) == NULL)
|
|
return (JBDATA *)ERROR_PTR("pix not read", procName, NULL);
|
|
|
|
snprintf(fname, L_BUF_SIZE, "%s%s", rootname, JB_DATA_EXT);
|
|
if ((data = l_binaryRead(fname, &size)) == NULL) {
|
|
pixDestroy(&pixs);
|
|
return (JBDATA *)ERROR_PTR("data not read", procName, NULL);
|
|
}
|
|
|
|
if ((sa = sarrayCreateLinesFromString((char *)data, 0)) == NULL) {
|
|
pixDestroy(&pixs);
|
|
LEPT_FREE(data);
|
|
return (JBDATA *)ERROR_PTR("sa not made", procName, NULL);
|
|
}
|
|
nsa = sarrayGetCount(sa); /* number of cc + 6 */
|
|
linestr = sarrayGetString(sa, 0, L_NOCOPY);
|
|
if (strcmp(linestr, "jb data file") != 0) {
|
|
pixDestroy(&pixs);
|
|
LEPT_FREE(data);
|
|
sarrayDestroy(&sa);
|
|
return (JBDATA *)ERROR_PTR("invalid jb data file", procName, NULL);
|
|
}
|
|
linestr = sarrayGetString(sa, 1, L_NOCOPY);
|
|
sscanf(linestr, "num pages = %d", &npages);
|
|
linestr = sarrayGetString(sa, 2, L_NOCOPY);
|
|
sscanf(linestr, "page size: w = %d, h = %d", &w, &h);
|
|
linestr = sarrayGetString(sa, 3, L_NOCOPY);
|
|
sscanf(linestr, "num components = %d", &ncomp);
|
|
linestr = sarrayGetString(sa, 4, L_NOCOPY);
|
|
sscanf(linestr, "num classes = %d\n", &nclass);
|
|
linestr = sarrayGetString(sa, 5, L_NOCOPY);
|
|
sscanf(linestr, "template lattice size: w = %d, h = %d\n", &cellw, &cellh);
|
|
|
|
#if 1
|
|
fprintf(stderr, "num pages = %d\n", npages);
|
|
fprintf(stderr, "page size: w = %d, h = %d\n", w, h);
|
|
fprintf(stderr, "num components = %d\n", ncomp);
|
|
fprintf(stderr, "num classes = %d\n", nclass);
|
|
fprintf(stderr, "template lattice size: w = %d, h = %d\n", cellw, cellh);
|
|
#endif
|
|
|
|
ninit = ncomp;
|
|
if (ncomp > 1000000) { /* fuzz protection */
|
|
L_WARNING("ncomp > 1M\n", procName);
|
|
ninit = 1000000;
|
|
}
|
|
naclass = numaCreate(ninit);
|
|
napage = numaCreate(ninit);
|
|
ptaul = ptaCreate(ninit);
|
|
for (i = 6; i < nsa; i++) {
|
|
linestr = sarrayGetString(sa, i, L_NOCOPY);
|
|
sscanf(linestr, "%d %d %d %d\n", &ipage, &iclass, &x, &y);
|
|
numaAddNumber(napage, ipage);
|
|
numaAddNumber(naclass, iclass);
|
|
ptaAddPt(ptaul, x, y);
|
|
}
|
|
|
|
jbdata = (JBDATA *)LEPT_CALLOC(1, sizeof(JBDATA));
|
|
jbdata->pix = pixs;
|
|
jbdata->npages = npages;
|
|
jbdata->w = w;
|
|
jbdata->h = h;
|
|
jbdata->nclass = nclass;
|
|
jbdata->latticew = cellw;
|
|
jbdata->latticeh = cellh;
|
|
jbdata->naclass = naclass;
|
|
jbdata->napage = napage;
|
|
jbdata->ptaul = ptaul;
|
|
|
|
LEPT_FREE(data);
|
|
sarrayDestroy(&sa);
|
|
return jbdata;
|
|
}
|
|
|
|
|
|
/*!
|
|
* \brief jbDataRender()
|
|
*
|
|
* \param[in] jbdata
|
|
* \param[in] debugflag if TRUE, writes into 2 bpp pix and adds
|
|
* component outlines in color
|
|
* \return pixa reconstruction of original images, using templates or
|
|
* NULL on error
|
|
*/
|
|
PIXA *
|
|
jbDataRender(JBDATA *data,
|
|
l_int32 debugflag)
|
|
{
|
|
l_int32 i, w, h, cellw, cellh, x, y, iclass, ipage;
|
|
l_int32 npages, nclass, ncomp, wp, hp;
|
|
BOX *box;
|
|
NUMA *naclass, *napage;
|
|
PIX *pixt, *pixt2, *pix, *pixd;
|
|
PIXA *pixat; /* pixa of templates */
|
|
PIXA *pixad; /* pixa of output images */
|
|
PIXCMAP *cmap;
|
|
PTA *ptaul;
|
|
|
|
PROCNAME("jbDataRender");
|
|
|
|
if (!data)
|
|
return (PIXA *)ERROR_PTR("data not defined", procName, NULL);
|
|
|
|
npages = data->npages;
|
|
w = data->w;
|
|
h = data->h;
|
|
pixt = data->pix;
|
|
nclass = data->nclass;
|
|
cellw = data->latticew;
|
|
cellh = data->latticeh;
|
|
naclass = data->naclass;
|
|
napage = data->napage;
|
|
ptaul = data->ptaul;
|
|
ncomp = numaGetCount(naclass);
|
|
|
|
/* Reconstruct the original set of images from the templates
|
|
* and the data associated with each component. First,
|
|
* generate the output pixa as a set of empty pix. */
|
|
if ((pixad = pixaCreate(npages)) == NULL)
|
|
return (PIXA *)ERROR_PTR("pixad not made", procName, NULL);
|
|
for (i = 0; i < npages; i++) {
|
|
if (debugflag == FALSE) {
|
|
pix = pixCreate(w, h, 1);
|
|
} else {
|
|
pix = pixCreate(w, h, 2);
|
|
cmap = pixcmapCreate(2);
|
|
pixcmapAddColor(cmap, 255, 255, 255);
|
|
pixcmapAddColor(cmap, 0, 0, 0);
|
|
pixcmapAddColor(cmap, 255, 0, 0); /* for box outlines */
|
|
pixSetColormap(pix, cmap);
|
|
}
|
|
pixaAddPix(pixad, pix, L_INSERT);
|
|
}
|
|
|
|
/* Put the class templates into a pixa. */
|
|
if ((pixat = pixaCreateFromPix(pixt, nclass, cellw, cellh)) == NULL) {
|
|
pixaDestroy(&pixad);
|
|
return (PIXA *)ERROR_PTR("pixat not made", procName, NULL);
|
|
}
|
|
|
|
/* Place each component in the right location on its page. */
|
|
for (i = 0; i < ncomp; i++) {
|
|
numaGetIValue(napage, i, &ipage);
|
|
numaGetIValue(naclass, i, &iclass);
|
|
pix = pixaGetPix(pixat, iclass, L_CLONE); /* the template */
|
|
wp = pixGetWidth(pix);
|
|
hp = pixGetHeight(pix);
|
|
ptaGetIPt(ptaul, i, &x, &y);
|
|
pixd = pixaGetPix(pixad, ipage, L_CLONE); /* the output page */
|
|
if (debugflag == FALSE) {
|
|
pixRasterop(pixd, x, y, wp, hp, PIX_SRC | PIX_DST, pix, 0, 0);
|
|
} else {
|
|
pixt2 = pixConvert1To2Cmap(pix);
|
|
pixRasterop(pixd, x, y, wp, hp, PIX_SRC | PIX_DST, pixt2, 0, 0);
|
|
box = boxCreate(x, y, wp, hp);
|
|
pixRenderBoxArb(pixd, box, 1, 255, 0, 0);
|
|
pixDestroy(&pixt2);
|
|
boxDestroy(&box);
|
|
}
|
|
pixDestroy(&pix); /* the clone only */
|
|
pixDestroy(&pixd); /* the clone only */
|
|
}
|
|
|
|
pixaDestroy(&pixat);
|
|
return pixad;
|
|
}
|
|
|
|
|
|
/*!
|
|
* \brief jbGetULCorners()
|
|
*
|
|
* \param[in] jbclasser
|
|
* \param[in] pixs full res image
|
|
* \param[in] boxa of c.c. bounding rectangles for this page
|
|
* \return 0 if OK, 1 on error
|
|
*
|
|
* <pre>
|
|
* Notes:
|
|
* (1) This computes the ptaul field, which has the global UL corners,
|
|
* adjusted for each specific component, so that each component
|
|
* can be replaced by the template for its class and have the
|
|
* centroid in the template in the same position as the
|
|
* centroid of the original connected component. It is important
|
|
* that this be done properly to avoid a wavy baseline in the
|
|
* result.
|
|
* (2) The array fields ptac and ptact give the centroids of
|
|
* those components relative to the UL corner of each component.
|
|
* Here, we compute the difference in each component, round to
|
|
* nearest integer, and correct the box->x and box->y by
|
|
* the appropriate integral difference.
|
|
* (3) The templates and stored instances are all bordered.
|
|
* </pre>
|
|
*/
|
|
l_ok
|
|
jbGetULCorners(JBCLASSER *classer,
|
|
PIX *pixs,
|
|
BOXA *boxa)
|
|
{
|
|
l_int32 i, baseindex, index, n, iclass, idelx, idely, x, y, dx, dy;
|
|
l_int32 *sumtab;
|
|
l_float32 x1, x2, y1, y2, delx, dely;
|
|
BOX *box;
|
|
NUMA *naclass;
|
|
PIX *pixt;
|
|
PTA *ptac, *ptact, *ptaul;
|
|
|
|
PROCNAME("jbGetULCorners");
|
|
|
|
if (!classer)
|
|
return ERROR_INT("classer not defined", procName, 1);
|
|
if (!pixs)
|
|
return ERROR_INT("pixs not defined", procName, 1);
|
|
if (!boxa)
|
|
return ERROR_INT("boxa not defined", procName, 1);
|
|
|
|
n = boxaGetCount(boxa);
|
|
ptaul = classer->ptaul;
|
|
naclass = classer->naclass;
|
|
ptac = classer->ptac;
|
|
ptact = classer->ptact;
|
|
baseindex = classer->baseindex; /* num components before this page */
|
|
sumtab = makePixelSumTab8();
|
|
for (i = 0; i < n; i++) {
|
|
index = baseindex + i;
|
|
ptaGetPt(ptac, index, &x1, &y1);
|
|
numaGetIValue(naclass, index, &iclass);
|
|
ptaGetPt(ptact, iclass, &x2, &y2);
|
|
delx = x2 - x1;
|
|
dely = y2 - y1;
|
|
if (delx >= 0)
|
|
idelx = (l_int32)(delx + 0.5);
|
|
else
|
|
idelx = (l_int32)(delx - 0.5);
|
|
if (dely >= 0)
|
|
idely = (l_int32)(dely + 0.5);
|
|
else
|
|
idely = (l_int32)(dely - 0.5);
|
|
if ((box = boxaGetBox(boxa, i, L_CLONE)) == NULL) {
|
|
LEPT_FREE(sumtab);
|
|
return ERROR_INT("box not found", procName, 1);
|
|
}
|
|
boxGetGeometry(box, &x, &y, NULL, NULL);
|
|
|
|
/* Get final increments dx and dy for best alignment */
|
|
pixt = pixaGetPix(classer->pixat, iclass, L_CLONE);
|
|
finalPositioningForAlignment(pixs, x, y, idelx, idely,
|
|
pixt, sumtab, &dx, &dy);
|
|
/* if (i % 20 == 0)
|
|
fprintf(stderr, "dx = %d, dy = %d\n", dx, dy); */
|
|
ptaAddPt(ptaul, x - idelx + dx, y - idely + dy);
|
|
boxDestroy(&box);
|
|
pixDestroy(&pixt);
|
|
}
|
|
|
|
LEPT_FREE(sumtab);
|
|
return 0;
|
|
}
|
|
|
|
|
|
/*!
|
|
* \brief jbGetLLCorners()
|
|
*
|
|
* \param[in] jbclasser
|
|
* \return 0 if OK, 1 on error
|
|
*
|
|
* <pre>
|
|
* Notes:
|
|
* (1) This computes the ptall field, which has the global LL corners,
|
|
* adjusted for each specific component, so that each component
|
|
* can be replaced by the template for its class and have the
|
|
* centroid in the template in the same position as the
|
|
* centroid of the original connected component. It is important
|
|
* that this be done properly to avoid a wavy baseline in the result.
|
|
* (2) It is computed here from the corresponding UL corners, where
|
|
* the input templates and stored instances are all bordered.
|
|
* This should be done after all pages have been processed.
|
|
* (3) For proper substitution, the templates whose LL corners are
|
|
* placed in these locations must be UN-bordered.
|
|
* This is available for a realistic jbig2 encoder, which would
|
|
* (1) encode each template without a border, and (2) encode
|
|
* the position using the LL corner (rather than the UL
|
|
* corner) because the difference between y-values
|
|
* of successive instances is typically close to zero.
|
|
* </pre>
|
|
*/
|
|
l_ok
|
|
jbGetLLCorners(JBCLASSER *classer)
|
|
{
|
|
l_int32 i, iclass, n, x1, y1, h;
|
|
NUMA *naclass;
|
|
PIX *pix;
|
|
PIXA *pixat;
|
|
PTA *ptaul, *ptall;
|
|
|
|
PROCNAME("jbGetLLCorners");
|
|
|
|
if (!classer)
|
|
return ERROR_INT("classer not defined", procName, 1);
|
|
|
|
ptaul = classer->ptaul;
|
|
naclass = classer->naclass;
|
|
pixat = classer->pixat;
|
|
|
|
ptaDestroy(&classer->ptall);
|
|
n = ptaGetCount(ptaul);
|
|
ptall = ptaCreate(n);
|
|
classer->ptall = ptall;
|
|
|
|
/* If the templates were bordered, we would add h - 1 to the UL
|
|
* corner y-value. However, because the templates to be used
|
|
* here have their borders removed, and the borders are
|
|
* JB_ADDED_PIXELS on each side, we add h - 1 - 2 * JB_ADDED_PIXELS
|
|
* to the UL corner y-value. */
|
|
for (i = 0; i < n; i++) {
|
|
ptaGetIPt(ptaul, i, &x1, &y1);
|
|
numaGetIValue(naclass, i, &iclass);
|
|
pix = pixaGetPix(pixat, iclass, L_CLONE);
|
|
h = pixGetHeight(pix);
|
|
ptaAddPt(ptall, x1, y1 + h - 1 - 2 * JB_ADDED_PIXELS);
|
|
pixDestroy(&pix);
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
|
|
/*----------------------------------------------------------------------*
|
|
* Static helpers *
|
|
*----------------------------------------------------------------------*/
|
|
/* When looking for similar matches we check templates whose size is +/- 2 in
|
|
* each direction. This involves 25 possible sizes. This array contains the
|
|
* offsets for each of those positions in a spiral pattern. There are 25 pairs
|
|
* of numbers in this array: even positions are x values. */
|
|
static int two_by_two_walk[50] = {
|
|
0, 0,
|
|
0, 1,
|
|
-1, 0,
|
|
0, -1,
|
|
1, 0,
|
|
-1, 1,
|
|
1, 1,
|
|
-1, -1,
|
|
1, -1,
|
|
0, -2,
|
|
2, 0,
|
|
0, 2,
|
|
-2, 0,
|
|
-1, -2,
|
|
1, -2,
|
|
2, -1,
|
|
2, 1,
|
|
1, 2,
|
|
-1, 2,
|
|
-2, 1,
|
|
-2, -1,
|
|
-2, -2,
|
|
2, -2,
|
|
2, 2,
|
|
-2, 2};
|
|
|
|
|
|
/*!
|
|
* \brief findSimilarSizedTemplatesInit()
|
|
*
|
|
* \param[in] classer
|
|
* \param[in] pixs instance to be matched
|
|
* \return Allocated context to be used with findSimilar*
|
|
*/
|
|
static JBFINDCTX *
|
|
findSimilarSizedTemplatesInit(JBCLASSER *classer,
|
|
PIX *pixs)
|
|
{
|
|
JBFINDCTX *state;
|
|
|
|
state = (JBFINDCTX *)LEPT_CALLOC(1, sizeof(JBFINDCTX));
|
|
state->w = pixGetWidth(pixs) - 2 * JB_ADDED_PIXELS;
|
|
state->h = pixGetHeight(pixs) - 2 * JB_ADDED_PIXELS;
|
|
state->classer = classer;
|
|
return state;
|
|
}
|
|
|
|
|
|
static void
|
|
findSimilarSizedTemplatesDestroy(JBFINDCTX **pstate)
|
|
{
|
|
JBFINDCTX *state;
|
|
|
|
PROCNAME("findSimilarSizedTemplatesDestroy");
|
|
|
|
if (pstate == NULL) {
|
|
L_WARNING("ptr address is null\n", procName);
|
|
return;
|
|
}
|
|
if ((state = *pstate) == NULL)
|
|
return;
|
|
|
|
l_dnaDestroy(&state->dna);
|
|
LEPT_FREE(state);
|
|
*pstate = NULL;
|
|
return;
|
|
}
|
|
|
|
|
|
/*!
|
|
* \brief findSimilarSizedTemplatesNext()
|
|
*
|
|
* \param[in] state from findSimilarSizedTemplatesInit
|
|
* \return next template number, or -1 when finished
|
|
*
|
|
* We have a dna hash table that maps template area to a list of template
|
|
* numbers with that area. We wish to find similar sized templates,
|
|
* so we first look for templates with the same width and height, and
|
|
* then with width + 1, etc. This walk is guided by the
|
|
* two_by_two_walk array, above.
|
|
*
|
|
* We don't want to have to collect the whole list of templates first,
|
|
* because we hope to find a well-matching template quickly. So we
|
|
* keep the context for this walk in an explictit state structure,
|
|
* and this function acts like a generator.
|
|
*/
|
|
static l_int32
|
|
findSimilarSizedTemplatesNext(JBFINDCTX *state)
|
|
{
|
|
l_int32 desiredh, desiredw, size, templ;
|
|
PIX *pixt;
|
|
|
|
while(1) { /* Continue the walk over step 'i' */
|
|
if (state->i >= 25) { /* all done; didn't find a good match */
|
|
return -1;
|
|
}
|
|
|
|
desiredw = state->w + two_by_two_walk[2 * state->i];
|
|
desiredh = state->h + two_by_two_walk[2 * state->i + 1];
|
|
if (desiredh < 1 || desiredw < 1) { /* invalid size */
|
|
state->i++;
|
|
continue;
|
|
}
|
|
|
|
if (!state->dna) {
|
|
/* We have yet to start walking the array for the step 'i' */
|
|
state->dna = l_dnaHashGetDna(state->classer->dahash,
|
|
(l_uint64)desiredh * desiredw, L_CLONE);
|
|
if (!state->dna) { /* nothing there */
|
|
state->i++;
|
|
continue;
|
|
}
|
|
|
|
state->n = 0; /* OK, we got a dna. */
|
|
}
|
|
|
|
/* Continue working on this dna */
|
|
size = l_dnaGetCount(state->dna);
|
|
for ( ; state->n < size; ) {
|
|
templ = (l_int32)(state->dna->array[state->n++] + 0.5);
|
|
pixt = pixaGetPix(state->classer->pixat, templ, L_CLONE);
|
|
if (pixGetWidth(pixt) - 2 * JB_ADDED_PIXELS == desiredw &&
|
|
pixGetHeight(pixt) - 2 * JB_ADDED_PIXELS == desiredh) {
|
|
pixDestroy(&pixt);
|
|
return templ;
|
|
}
|
|
pixDestroy(&pixt);
|
|
}
|
|
|
|
/* Exhausted the dna (no match found); take another step and
|
|
* try again. */
|
|
state->i++;
|
|
l_dnaDestroy(&state->dna);
|
|
continue;
|
|
}
|
|
}
|
|
|
|
|
|
/*!
|
|
* \brief finalPositioningForAlignment()
|
|
*
|
|
* \param[in] pixs input page image
|
|
* \param[in] x, y location of UL corner of bb of component in pixs
|
|
* \param[in] idelx, idely compensation to match centroids of component
|
|
* and template
|
|
* \param[in] pixt template, with JB_ADDED_PIXELS of padding
|
|
* on all sides
|
|
* \param[in] sumtab for summing fg pixels in an image
|
|
* \param[in] pdx, pdy return delta on position for best match; each
|
|
* one is in the set {-1, 0, 1}
|
|
* \return 0 if OK, 1 on error
|
|
*
|
|
*/
|
|
static l_int32
|
|
finalPositioningForAlignment(PIX *pixs,
|
|
l_int32 x,
|
|
l_int32 y,
|
|
l_int32 idelx,
|
|
l_int32 idely,
|
|
PIX *pixt,
|
|
l_int32 *sumtab,
|
|
l_int32 *pdx,
|
|
l_int32 *pdy)
|
|
{
|
|
l_int32 w, h, i, j, minx, miny, count, mincount;
|
|
PIX *pixi; /* clipped from source pixs */
|
|
PIX *pixr; /* temporary storage */
|
|
BOX *box;
|
|
|
|
PROCNAME("finalPositioningForAlignment");
|
|
|
|
if (!pixs)
|
|
return ERROR_INT("pixs not defined", procName, 1);
|
|
if (!pixt)
|
|
return ERROR_INT("pixt not defined", procName, 1);
|
|
if (!pdx || !pdy)
|
|
return ERROR_INT("&dx and &dy not both defined", procName, 1);
|
|
if (!sumtab)
|
|
return ERROR_INT("sumtab not defined", procName, 1);
|
|
*pdx = *pdy = 0;
|
|
|
|
/* Use JB_ADDED_PIXELS pixels padding on each side */
|
|
pixGetDimensions(pixt, &w, &h, NULL);
|
|
box = boxCreate(x - idelx - JB_ADDED_PIXELS,
|
|
y - idely - JB_ADDED_PIXELS, w, h);
|
|
pixi = pixClipRectangle(pixs, box, NULL);
|
|
boxDestroy(&box);
|
|
if (!pixi)
|
|
return ERROR_INT("pixi not made", procName, 1);
|
|
|
|
pixr = pixCreate(pixGetWidth(pixi), pixGetHeight(pixi), 1);
|
|
mincount = 0x7fffffff;
|
|
for (i = -1; i <= 1; i++) {
|
|
for (j = -1; j <= 1; j++) {
|
|
pixCopy(pixr, pixi);
|
|
pixRasterop(pixr, j, i, w, h, PIX_SRC ^ PIX_DST, pixt, 0, 0);
|
|
pixCountPixels(pixr, &count, sumtab);
|
|
if (count < mincount) {
|
|
minx = j;
|
|
miny = i;
|
|
mincount = count;
|
|
}
|
|
}
|
|
}
|
|
pixDestroy(&pixi);
|
|
pixDestroy(&pixr);
|
|
|
|
*pdx = minx;
|
|
*pdy = miny;
|
|
return 0;
|
|
}
|