/*====================================================================* - Copyright (C) 2001 Leptonica. All rights reserved. - - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions - are met: - 1. Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - 2. Redistributions in binary form must reproduce the above - copyright notice, this list of conditions and the following - disclaimer in the documentation and/or other materials - provided with the distribution. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL ANY - CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY - OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. *====================================================================*/ /*! * \file recogtrain.c *

 *
 *      Training on labeled data
 *         l_int32             recogTrainLabeled()
 *         PIX                *recogProcessLabeled()
 *         l_int32             recogAddSample()
 *         PIX                *recogModifyTemplate()
 *         l_int32             recogAverageSamples()
 *         l_int32             pixaAccumulateSamples()
 *         l_int32             recogTrainingFinished()
 *         static l_int32      recogTemplatesAreOK()
 *         PIXA               *recogFilterPixaBySize()
 *         PIXAA              *recogSortPixaByClass()
 *         l_int32             recogRemoveOutliers1()
 *         PIXA               *pixaRemoveOutliers1()
 *         l_int32             recogRemoveOutliers2()
 *         PIXA               *pixaRemoveOutliers2()
 *
 *      Training on unlabeled data
 *         L_RECOG             recogTrainFromBoot()
 *
 *      Padding the digit training set
 *         l_int32             recogPadDigitTrainingSet()
 *         l_int32             recogIsPaddingNeeded()
 *         static SARRAY      *recogAddMissingClassStrings()
 *         PIXA               *recogAddDigitPadTemplates()
 *         static l_int32      recogCharsetAvailable()
 *
 *      Making a boot digit recognizer
 *         L_RECOG            *recogMakeBootDigitRecog()
 *         PIXA               *recogMakeBootDigitTemplates()
 *
 *      Debugging
 *         l_int32             recogShowContent()
 *         l_int32             recogDebugAverages()
 *         l_int32             recogShowAverageTemplates()
 *         static PIX         *pixDisplayOutliers()
 *         PIX                *recogDisplayOutlier()
 *         PIX                *recogShowMatchesInRange()
 *         PIX                *recogShowMatch()
 *
 *  These abbreviations are for the type of template to be used:
 *    * SI (for the scanned images)
 *    * WNL (for width-normalized lines, formed by first skeletonizing
 *           the scanned images, and then dilating to a fixed width)
 *  These abbreviations are for the type of recognizer:
 *    * BAR (book-adapted recognizer; the best type; can do identification
 *           with unscaled images and separation of touching characters.
 *    * BSR (bootstrap recognizer; used if more labeled templates are
 *           required for a BAR, either for finding more templates from
 *           the book, or making a hybrid BAR/BSR.
 *
 *  The recog struct typically holds two versions of the input templates
 *  (e.g. from a pixa) that were used to generate it.  One version is
 *  the unscaled input templates.  The other version is the one that
 *  will be used by the recog to identify unlabeled data.  That version
 *  depends on the input parameters when the recog is created.  The choices
 *  for the latter version, and their suggested use, are:
 *  (1) unscaled SI -- typical for BAR, generated from book images
 *  (2) unscaled WNL -- ditto
 *  (3) scaled SI -- typical for recognizers containing template
 *      images from sources other than the book to be recognized
 *  (4) scaled WNL -- ditto
 *  For cases (3) and (4), we recommend scaling to fixed height; e.g.,
 *  scalew = 0, scaleh = 40.
 *  When using WNL, we recommend using a width of 5 in the template
 *  and 4 in the unlabeled data.
 *  It appears that better results for a BAR are usually obtained using
 *  SI than WNL, but more experimentation is needed.
 *
 *  This utility is designed to build recognizers that are specifically
 *  adapted from a large amount of material, such as a book.  These
 *  use labeled templates taken from the material, and not scaled.
 *  In addition, two special recognizers are useful:
 *  (1) Bootstrap recognizer (BSR).  This uses height-scaled templates,
 *      that have been extended with several repetitions in one of two ways:
 *      (a) aniotropic width scaling (for either SI or WNL)
 *      (b) iterative erosions/dilations (for SI).
 *  (2) Outlier removal.  This uses height scaled templates.  It can be
 *      implemented without using templates that are aligned averages of all
 *      templates in a class.
 *
 *  Recognizers are inexpensive to generate, for example, from a pixa
 *  of labeled templates.  The general process of building a BAR is
 *  to start with labeled templates, e.g., in a pixa, make a BAR, and
 *  analyze new samples from the book to augment the BAR until it has
 *  enough samples for each character class.  Along the way, samples
 *  from a BSR may be added for help in training.  If not enough samples
 *  are available for the BAR, it can finally be augmented with BSR
 *  samples, in which case the resulting hybrid BAR/BSR recognizer
 *  must work on scaled images.
 *
 *  Here are the steps in doing recog training:
 *  A. Generate a BAR from any existing labeled templates
 *    (1) Create a recog and add the templates, using recogAddSample().
 *        This stores the unscaled templates.
 *        [Note: this can be done in one step if the labeled templates are put
 *         into a pixa:
 *           L_Recog *rec = recogCreateFromPixa(pixa, ...);  ]
 *    (2) Call recogTrainingFinished() to generate the (sometimes modified)
 *        templates to be used for correlation.
 *    (3) Optionally, remove outliers.
 *    If there are sufficient samples in the classes, we're done. Otherwise,
 *  B. Try to get more samples from the book to pad the BAR.
 *     (1) Save the unscaled, labeled templates from the BAR.
 *     (2) Supplement the BAR with bootstrap templates to make a hybrid BAR/BSR.
 *     (3) Do recognition on more unlabeled images, scaled to a fixed height
 *     (4) Add the unscaled, labeled images to the saved set.
 *     (5) Optionally, remove outliers.
 *     If there are sufficient samples in the classes, we're done. Otherwise,
 *  C. For classes without a sufficient number of templates, we can
 *     supplement the BAR with templates from a BSR (a hybrid RAR/BSR),
 *     and do recognition scaled to a fixed height.
 *
 *  Here are several methods that can be used for identifying outliers:
 *  (1) Compute average templates for each class and remove a candidate
 *      that is poorly correlated with the average.  This is the most
 *      simple method.  recogRemoveOutliers1() uses this, supplemented with
 *      a second threshold and a target number of templates to be saved.
 *  (2) Compute average templates for each class and remove a candidate
 *      that is more highly correlated with the average of some other class.
 *      This does not require setting a threshold for the correlation.
 *      recogRemoveOutliers2() uses this method, supplemented with a minimum
 *      correlation score.
 *  (3) For each candidate, find the average correlation with other
 *      members of its class, and remove those that have a relatively
 *      low average correlation.  This is similar to (1), gives comparable
 *      results and because it does not use average templates, it requires
 *      a bit more computation.
 *

*/ #include #include "allheaders.h" /* Static functions */ static l_int32 recogTemplatesAreOK(L_RECOG *recog, l_int32 minsize, l_float32 minfract, l_int32 *pok); static SARRAY *recogAddMissingClassStrings(L_RECOG *recog); static l_int32 recogCharsetAvailable(l_int32 type); static PIX *pixDisplayOutliers(PIXA *pixas, NUMA *nas); static PIX *recogDisplayOutlier(L_RECOG *recog, l_int32 iclass, l_int32 jsamp, l_int32 maxclass, l_float32 maxscore); /* Default parameters that are used in recogTemplatesAreOK() and * in outlier removal functions, and that use template set size * to decide if the set of templates (before outliers are removed) * is valid. Values are set to accept most sets of sample templates. */ static const l_int32 DefaultMinSetSize = 1; /* minimum number of samples for a valid class */ static const l_float32 DefaultMinSetFract = 0.4; /* minimum fraction of classes required for a valid recog */ /* Defaults in pixaRemoveOutliers1() and pixaRemoveOutliers2() */ static const l_float32 DefaultMinScore = 0.75; /* keep everything above */ static const l_int32 DefaultMinTarget = 3; /* to be kept if possible */ static const l_float32 LowerScoreThreshold = 0.5; /* templates can be * kept down to this score to if needed to retain the * desired minimum number of templates */ /*------------------------------------------------------------------------* * Training * *------------------------------------------------------------------------*/ /*! * \brief recogTrainLabeled() * * \param[in] recog in training mode * \param[in] pixs if depth > 1, will be thresholded to 1 bpp * \param[in] box [optional] cropping box * \param[in] text [optional] if null, use text field in pix * \param[in] debug 1 to display images of samples not captured * \return 0 if OK, 1 on error * *

 * Notes:
 *      (1) Training is restricted to the addition of a single
 *          character in an arbitrary (e.g., UTF8) charset
 *      (2) If box != null, it should represent the location in %pixs
 *          of the character image.
 *

*/ l_ok recogTrainLabeled(L_RECOG *recog, PIX *pixs, BOX *box, char *text, l_int32 debug) { l_int32 ret; PIX *pix; PROCNAME("recogTrainLabeled"); if (!recog) return ERROR_INT("recog not defined", procName, 1); if (!pixs) return ERROR_INT("pixs not defined", procName, 1); /* Prepare the sample to be added. This step also acts * as a filter, and can invalidate pixs as a template. */ ret = recogProcessLabeled(recog, pixs, box, text, &pix); if (ret) { pixDestroy(&pix); L_WARNING("failure to get sample '%s' for training\n", procName, text); return 1; } recogAddSample(recog, pix, debug); pixDestroy(&pix); return 0; } /*! * \brief recogProcessLabeled() * * \param[in] recog in training mode * \param[in] pixs if depth > 1, will be thresholded to 1 bpp * \param[in] box [optional] cropping box * \param[in] text [optional] if null, use text field in pix * \param[out] ppix addr of pix, 1 bpp, labeled * \return 0 if OK, 1 on error * *

 * Notes:
 *      (1) This crops and binarizes the input image, generating a pix
 *          of one character where the charval is inserted into the pix.
 *

*/ l_ok recogProcessLabeled(L_RECOG *recog, PIX *pixs, BOX *box, char *text, PIX **ppix) { char *textdata; l_int32 textinpix, textin, nsets; NUMA *na; PIX *pix1, *pix2, *pix3, *pix4; PROCNAME("recogProcessLabeled"); if (!ppix) return ERROR_INT("&pix not defined", procName, 1); *ppix = NULL; if (!recog) return ERROR_INT("recog not defined", procName, 1); if (!pixs) return ERROR_INT("pixs not defined", procName, 1); /* Find the text; this will be stored with the output images */ textin = text && (text[0] != '\0'); textinpix = (pixs->text && (pixs->text[0] != '\0')); if (!textin && !textinpix) { L_ERROR("no text: %d\n", procName, recog->num_samples); return 1; } textdata = (textin) ? text : pixs->text; /* do not free */ /* Crop and binarize if necessary */ if (box) pix1 = pixClipRectangle(pixs, box, NULL); else pix1 = pixClone(pixs); if (pixGetDepth(pix1) > 1) pix2 = pixConvertTo1(pix1, recog->threshold); else pix2 = pixClone(pix1); pixDestroy(&pix1); /* Remove isolated noise, using as a criterion all components * that are removed by a vertical opening of size 5. */ pix3 = pixMorphSequence(pix2, "o1.5", 0); /* seed */ pixSeedfillBinary(pix3, pix3, pix2, 8); /* fill from seed; clip to pix2 */ pixDestroy(&pix2); /* Clip to foreground */ pixClipToForeground(pix3, &pix4, NULL); pixDestroy(&pix3); if (!pix4) return ERROR_INT("pix4 is empty", procName, 1); /* Verify that if there is more than 1 c.c., they all have * horizontal overlap */ na = pixCountByColumn(pix4, NULL); numaCountNonzeroRuns(na, &nsets); numaDestroy(&na); if (nsets > 1) { L_WARNING("found %d sets of horiz separated c.c.; skipping\n", procName, nsets); pixDestroy(&pix4); return 1; } pixSetText(pix4, textdata); *ppix = pix4; return 0; } /*! * \brief recogAddSample() * * \param[in] recog * \param[in] pix a single character, 1 bpp * \param[in] debug * \return 0 if OK, 1 on error * *

 * Notes:
 *      (1) The pix is 1 bpp, with the character string label embedded.
 *      (2) The pixaa_u array of the recog is initialized to accept
 *          up to 256 different classes.  When training is finished,
 *          the arrays are truncated to the actual number of classes.
 *          To pad an existing recog from the boot recognizers, training
 *          is started again; if samples from a new class are added,
 *          the pixaa_u array is extended by adding a pixa to hold them.
 *

*/ l_ok recogAddSample(L_RECOG *recog, PIX *pix, l_int32 debug) { char *text; l_int32 npa, charint, index; PIXA *pixa1; PIXAA *paa; PROCNAME("recogAddSample"); if (!recog) return ERROR_INT("recog not defined", procName, 1); if (!pix || pixGetDepth(pix) != 1) return ERROR_INT("pix not defined or not 1 bpp\n", procName, 1); if (recog->train_done) return ERROR_INT("not added: training has been completed", procName, 1); paa = recog->pixaa_u; /* Make sure the character is in the set */ text = pixGetText(pix); if (l_convertCharstrToInt(text, &charint) == 1) { L_ERROR("invalid text: %s\n", procName, text); return 1; } /* Determine the class array index. Check if the class * alreadly exists, and if not, add it. */ if (recogGetClassIndex(recog, charint, text, &index) == 1) { /* New class must be added */ npa = pixaaGetCount(paa, NULL); if (index > npa) { L_ERROR("oops: bad index %d > npa %d!!\n", procName, index, npa); return 1; } if (index == npa) { /* paa needs to be extended */ L_INFO("Adding new class and pixa: index = %d, text = %s\n", procName, index, text); pixa1 = pixaCreate(10); pixaaAddPixa(paa, pixa1, L_INSERT); } } if (debug) { L_INFO("Identified text label: %s\n", procName, text); L_INFO("Identified: charint = %d, index = %d\n", procName, charint, index); } /* Insert the unscaled character image into the right pixa. * (Unscaled images are required to split touching characters.) */ recog->num_samples++; pixaaAddPix(paa, index, pix, NULL, L_COPY); return 0; } /*! * \brief recogModifyTemplate() * * \param[in] recog * \param[in] pixs 1 bpp, to be optionally scaled and turned into * strokes of fixed width * \return pixd modified pix if OK, NULL on error */ PIX * recogModifyTemplate(L_RECOG *recog, PIX *pixs) { l_int32 w, h, empty; PIX *pix1, *pix2; PROCNAME("recogModifyTemplate"); if (!recog) return (PIX *)ERROR_PTR("recog not defined", procName, NULL); if (!pixs) return (PIX *)ERROR_PTR("pixs not defined", procName, NULL); /* Scale first */ pixGetDimensions(pixs, &w, &h, NULL); if ((recog->scalew == 0 || recog->scalew == w) && (recog->scaleh == 0 || recog->scaleh == h)) { /* no scaling */ pix1 = pixCopy(NULL, pixs); } else { pix1 = pixScaleToSize(pixs, recog->scalew, recog->scaleh); } if (!pix1) return (PIX *)ERROR_PTR("pix1 not made", procName, NULL); /* Then optionally convert to lines */ if (recog->linew <= 0) { pix2 = pixClone(pix1); } else { pix2 = pixSetStrokeWidth(pix1, recog->linew, 1, 8); } pixDestroy(&pix1); if (!pix2) return (PIX *)ERROR_PTR("pix2 not made", procName, NULL); /* Make sure we still have some pixels */ pixZero(pix2, &empty); if (empty) { pixDestroy(&pix2); return (PIX *)ERROR_PTR("modified template has no pixels", procName, NULL); } return pix2; } /*! * \brief recogAverageSamples() * * \param[in] precog addr of existing recog; may be destroyed * \param[in] debug * \return 0 on success, 1 on failure * *

 * Notes:
 *      (1) This is only called in two situations:
 *          (a) When splitting characters using either the DID method
 *              recogDecode() or the the greedy splitter
 *              recogCorrelationBestRow()
 *          (b) By a special recognizer that is used to remove outliers.
 *          Both unscaled and scaled inputs are averaged.
 *      (2) If the data in any class is nonexistent (no samples), or
 *          very bad (no fg pixels in the average), or if the ratio
 *          of max/min average unscaled class template heights is
 *          greater than max_ht_ratio, this destroys the recog.
 *          The caller must check the return value of the recog.
 *      (3) Set debug = 1 to view the resulting templates and their centroids.
 *

*/ l_int32 recogAverageSamples(L_RECOG **precog, l_int32 debug) { l_int32 i, nsamp, size, area, bx, by, badclass; l_float32 x, y, hratio; BOX *box; PIXA *pixa1; PIX *pix1, *pix2, *pix3; PTA *pta1; L_RECOG *recog; PROCNAME("recogAverageSamples"); if (!precog) return ERROR_INT("&recog not defined", procName, 1); if ((recog = *precog) == NULL) return ERROR_INT("recog not defined", procName, 1); if (recog->ave_done) { if (debug) /* always do this if requested */ recogShowAverageTemplates(recog); return 0; } /* Remove any previous averaging data */ size = recog->setsize; pixaDestroy(&recog->pixa_u); ptaDestroy(&recog->pta_u); numaDestroy(&recog->nasum_u); recog->pixa_u = pixaCreate(size); recog->pta_u = ptaCreate(size); recog->nasum_u = numaCreate(size); pixaDestroy(&recog->pixa); ptaDestroy(&recog->pta); numaDestroy(&recog->nasum); recog->pixa = pixaCreate(size); recog->pta = ptaCreate(size); recog->nasum = numaCreate(size); /* Unscaled bitmaps: compute averaged bitmap, centroid, and fg area. * Note that when we threshold to 1 bpp the 8 bpp averaged template * that is returned from the accumulator, it will not be cropped * to the foreground. We must crop it, because the correlator * makes that assumption and will return a zero value if the * width or height of the two images differs by several pixels. * But cropping to fg can cause the value of the centroid to * change, if bx > 0 or by > 0. */ badclass = FALSE; for (i = 0; i < size; i++) { pixa1 = pixaaGetPixa(recog->pixaa_u, i, L_CLONE); pta1 = ptaaGetPta(recog->ptaa_u, i, L_CLONE); nsamp = pixaGetCount(pixa1); nsamp = L_MIN(nsamp, 256); /* we only use the first 256 */ if (nsamp == 0) { /* no information for this class */ L_ERROR("no samples in class %d\n", procName, i); badclass = TRUE; pixaDestroy(&pixa1); ptaDestroy(&pta1); break; } else { pixaAccumulateSamples(pixa1, pta1, &pix1, &x, &y); pix2 = pixThresholdToBinary(pix1, L_MAX(1, nsamp / 2)); pixInvert(pix2, pix2); pixClipToForeground(pix2, &pix3, &box); if (!box) { L_ERROR("no fg pixels in average for uclass %d\n", procName, i); badclass = TRUE; pixDestroy(&pix1); pixDestroy(&pix2); pixaDestroy(&pixa1); ptaDestroy(&pta1); break; } else { boxGetGeometry(box, &bx, &by, NULL, NULL); pixaAddPix(recog->pixa_u, pix3, L_INSERT); ptaAddPt(recog->pta_u, x - bx, y - by); /* correct centroid */ pixCountPixels(pix3, &area, recog->sumtab); numaAddNumber(recog->nasum_u, area); /* foreground */ boxDestroy(&box); } pixDestroy(&pix1); pixDestroy(&pix2); } pixaDestroy(&pixa1); ptaDestroy(&pta1); } /* Are any classes bad? If so, destroy the recog and return an error */ if (badclass) { recogDestroy(precog); return ERROR_INT("at least 1 bad class; destroying recog", procName, 1); } /* Get the range of sizes of the unscaled average templates. * Reject if the height ratio is too large. */ pixaSizeRange(recog->pixa_u, &recog->minwidth_u, &recog->minheight_u, &recog->maxwidth_u, &recog->maxheight_u); hratio = (l_float32)recog->maxheight_u / (l_float32)recog->minheight_u; if (hratio > recog->max_ht_ratio) { L_ERROR("ratio of max/min height of average templates = %4.1f;" " destroying recog\n", procName, hratio); recogDestroy(precog); return 1; } /* Scaled bitmaps: compute averaged bitmap, centroid, and fg area */ for (i = 0; i < size; i++) { pixa1 = pixaaGetPixa(recog->pixaa, i, L_CLONE); pta1 = ptaaGetPta(recog->ptaa, i, L_CLONE); nsamp = pixaGetCount(pixa1); nsamp = L_MIN(nsamp, 256); /* we only use the first 256 */ pixaAccumulateSamples(pixa1, pta1, &pix1, &x, &y); pix2 = pixThresholdToBinary(pix1, L_MAX(1, nsamp / 2)); pixInvert(pix2, pix2); pixClipToForeground(pix2, &pix3, &box); if (!box) { L_ERROR("no fg pixels in average for sclass %d\n", procName, i); badclass = TRUE; pixDestroy(&pix1); pixDestroy(&pix2); pixaDestroy(&pixa1); ptaDestroy(&pta1); break; } else { boxGetGeometry(box, &bx, &by, NULL, NULL); pixaAddPix(recog->pixa, pix3, L_INSERT); ptaAddPt(recog->pta, x - bx, y - by); /* correct centroid */ pixCountPixels(pix3, &area, recog->sumtab); numaAddNumber(recog->nasum, area); /* foreground */ boxDestroy(&box); } pixDestroy(&pix1); pixDestroy(&pix2); pixaDestroy(&pixa1); ptaDestroy(&pta1); } if (badclass) { recogDestroy(precog); return ERROR_INT("at least 1 bad class; destroying recog", procName, 1); } /* Get the range of widths of the scaled average templates */ pixaSizeRange(recog->pixa, &recog->minwidth, NULL, &recog->maxwidth, NULL); /* Get dimensions useful for splitting */ recog->min_splitw = L_MAX(5, recog->minwidth_u - 5); recog->max_splith = recog->maxheight_u + 12; /* allow for skew */ if (debug) recogShowAverageTemplates(recog); recog->ave_done = TRUE; return 0; } /*! * \brief pixaAccumulateSamples() * * \param[in] pixa of samples from the same class, 1 bpp * \param[in] pta [optional] of centroids of the samples * \param[out] ppixd accumulated samples, 8 bpp * \param[out] px [optional] average x coordinate of centroids * \param[out] py [optional] average y coordinate of centroids * \return 0 on success, 1 on failure * *

 * Notes:
 *      (1) This generates an aligned (by centroid) sum of the input pix.
 *      (2) We use only the first 256 samples; that's plenty.
 *      (3) If pta is not input, we generate two tables, and discard
 *          after use.  If this is called many times, it is better
 *          to precompute the pta.
 *

*/ l_int32 pixaAccumulateSamples(PIXA *pixa, PTA *pta, PIX **ppixd, l_float32 *px, l_float32 *py) { l_int32 i, n, maxw, maxh, xdiff, ydiff; l_int32 *centtab, *sumtab; l_float32 xc, yc, xave, yave; PIX *pix1, *pix2, *pixsum; PTA *ptac; PROCNAME("pixaAccumulateSamples"); if (px) *px = 0; if (py) *py = 0; if (!ppixd) return ERROR_INT("&pixd not defined", procName, 1); *ppixd = NULL; if (!pixa) return ERROR_INT("pixa not defined", procName, 1); n = pixaGetCount(pixa); if (pta && ptaGetCount(pta) != n) return ERROR_INT("pta count differs from pixa count", procName, 1); n = L_MIN(n, 256); /* take the first 256 only */ if (n == 0) return ERROR_INT("pixa array empty", procName, 1); /* Find the centroids */ if (pta) { ptac = ptaClone(pta); } else { /* generate them here */ ptac = ptaCreate(n); centtab = makePixelCentroidTab8(); sumtab = makePixelSumTab8(); for (i = 0; i < n; i++) { pix1 = pixaGetPix(pixa, i, L_CLONE); pixCentroid(pix1, centtab, sumtab, &xc, &yc); ptaAddPt(ptac, xc, yc); } LEPT_FREE(centtab); LEPT_FREE(sumtab); } /* Find the average value of the centroids */ xave = yave = 0; for (i = 0; i < n; i++) { ptaGetPt(pta, i, &xc, &yc); xave += xc; yave += yc; } xave = xave / (l_float32)n; yave = yave / (l_float32)n; if (px) *px = xave; if (py) *py = yave; /* Place all pix with their centroids located at the average * centroid value, and sum the results. Make the accumulator * image slightly larger than the largest sample to insure * that all pixels are represented in the accumulator. */ pixaSizeRange(pixa, NULL, NULL, &maxw, &maxh); pixsum = pixInitAccumulate(maxw + 5, maxh + 5, 0); pix1 = pixCreate(maxw, maxh, 1); for (i = 0; i < n; i++) { pix2 = pixaGetPix(pixa, i, L_CLONE); ptaGetPt(ptac, i, &xc, &yc); xdiff = (l_int32)(xave - xc); ydiff = (l_int32)(yave - yc); pixClearAll(pix1); pixRasterop(pix1, xdiff, ydiff, maxw, maxh, PIX_SRC, pix2, 0, 0); pixAccumulate(pixsum, pix1, L_ARITH_ADD); pixDestroy(&pix2); } *ppixd = pixFinalAccumulate(pixsum, 0, 8); pixDestroy(&pix1); pixDestroy(&pixsum); ptaDestroy(&ptac); return 0; } /*! * \brief recogTrainingFinished() * * \param[in] precog addr of recog * \param[in] modifyflag 1 to use recogModifyTemplate(); 0 otherwise * \param[in] minsize set to -1 for default * \param[in] minfract set to -1.0 for default * \return 0 if OK, 1 on error (input recog will be destroyed) * *

 * Notes:
 *      (1) This must be called after all training samples have been added.
 *      (2) If the templates are not good enough, the recog input is destroyed.
 *      (3) Usually, %modifyflag == 1, because we want to apply
 *          recogModifyTemplate() to generate the actual templates
 *          that will be used.  The one exception is when reading a
 *          serialized recog: there we want to put the same set of
 *          templates in both the unscaled and modified pixaa.
 *          See recogReadStream() to see why we do this.
 *      (4) See recogTemplatesAreOK() for %minsize and %minfract usage.
 *      (5) The following things are done here:
 *          (a) Allocate (or reallocate) storage for (possibly) modified
 *              bitmaps, centroids, and fg areas.
 *          (b) Generate the (possibly) modified bitmaps.
 *          (c) Compute centroid and fg area data for both unscaled and
 *              modified bitmaps.
 *          (d) Truncate the pixaa, ptaa and numaa arrays down from
 *              256 to the actual size.
 *      (6) Putting these operations here makes it simple to recompute
 *          the recog with different modifications on the bitmaps.
 *      (7) Call recogShowContent() to display the templates, both
 *          unscaled and modified.
 *

*/ l_ok recogTrainingFinished(L_RECOG **precog, l_int32 modifyflag, l_int32 minsize, l_float32 minfract) { l_int32 ok, i, j, size, nc, ns, area; l_float32 xave, yave; PIX *pix, *pixd; PIXA *pixa; PIXAA *paa; PTA *pta; PTAA *ptaa; L_RECOG *recog; PROCNAME("recogTrainingFinished"); if (!precog) return ERROR_INT("&recog not defined", procName, 1); if ((recog = *precog) == NULL) return ERROR_INT("recog not defined", procName, 1); if (recog->train_done) return 0; /* Test the input templates */ recogTemplatesAreOK(recog, minsize, minfract, &ok); if (!ok) { recogDestroy(precog); return ERROR_INT("bad templates", procName, 1); } /* Generate the storage for the possibly-scaled training bitmaps */ size = recog->maxarraysize; paa = pixaaCreate(size); pixa = pixaCreate(1); pixaaInitFull(paa, pixa); pixaDestroy(&pixa); pixaaDestroy(&recog->pixaa); recog->pixaa = paa; /* Generate the storage for the unscaled centroid training data */ ptaa = ptaaCreate(size); pta = ptaCreate(0); ptaaInitFull(ptaa, pta); ptaaDestroy(&recog->ptaa_u); recog->ptaa_u = ptaa; /* Generate the storage for the possibly-scaled centroid data */ ptaa = ptaaCreate(size); ptaaInitFull(ptaa, pta); ptaDestroy(&pta); ptaaDestroy(&recog->ptaa); recog->ptaa = ptaa; /* Generate the storage for the fg area data */ numaaDestroy(&recog->naasum_u); numaaDestroy(&recog->naasum); recog->naasum_u = numaaCreateFull(size, 0); recog->naasum = numaaCreateFull(size, 0); paa = recog->pixaa_u; nc = recog->setsize; for (i = 0; i < nc; i++) { pixa = pixaaGetPixa(paa, i, L_CLONE); ns = pixaGetCount(pixa); for (j = 0; j < ns; j++) { /* Save centroid and area data for the unscaled pix */ pix = pixaGetPix(pixa, j, L_CLONE); pixCentroid(pix, recog->centtab, recog->sumtab, &xave, &yave); ptaaAddPt(recog->ptaa_u, i, xave, yave); pixCountPixels(pix, &area, recog->sumtab); numaaAddNumber(recog->naasum_u, i, area); /* foreground */ /* Insert the (optionally) scaled character image, and * save centroid and area data for it */ if (modifyflag == 1) pixd = recogModifyTemplate(recog, pix); else pixd = pixClone(pix); if (pixd) { pixaaAddPix(recog->pixaa, i, pixd, NULL, L_INSERT); pixCentroid(pixd, recog->centtab, recog->sumtab, &xave, &yave); ptaaAddPt(recog->ptaa, i, xave, yave); pixCountPixels(pixd, &area, recog->sumtab); numaaAddNumber(recog->naasum, i, area); } else { L_ERROR("failed: modified template for class %d, sample %d\n", procName, i, j); } pixDestroy(&pix); } pixaDestroy(&pixa); } /* Truncate the arrays to those with non-empty containers */ pixaaTruncate(recog->pixaa_u); pixaaTruncate(recog->pixaa); ptaaTruncate(recog->ptaa_u); ptaaTruncate(recog->ptaa); numaaTruncate(recog->naasum_u); numaaTruncate(recog->naasum); recog->train_done = TRUE; return 0; } /*! * \brief recogTemplatesAreOK() * * \param[in] recog * \param[in] minsize set to -1 for default * \param[in] minfract set to -1.0 for default * \param[out] pok set to 1 if template set is valid; 0 otherwise * \return 1 on error; 0 otherwise. An invalid template set is not an error. * *

 * Notes:
 *      (1) This is called by recogTrainingFinished().  A return value of 0
 *          will cause recogTrainingFinished() to destroy the recog.
 *      (2) %minsize is the minimum number of samples required for
 *          the class; -1 uses the default
 *      (3) %minfract is the minimum fraction of classes required for
 *          the recog to be usable; -1.0 uses the default
 *

*/ static l_int32 recogTemplatesAreOK(L_RECOG *recog, l_int32 minsize, l_float32 minfract, l_int32 *pok) { l_int32 i, n, validsets, nt; l_float32 ratio; NUMA *na; PROCNAME("recogTemplatesAreOK"); if (!pok) return ERROR_INT("&ok not defined", procName, 1); *pok = 0; if (!recog) return ERROR_INT("recog not defined", procName, 1); minsize = (minsize < 0) ? DefaultMinSetSize : minsize; minfract = (minfract < 0) ? DefaultMinSetFract : minfract; n = pixaaGetCount(recog->pixaa_u, &na); validsets = 0; for (i = 0, validsets = 0; i < n; i++) { numaGetIValue(na, i, &nt); if (nt >= minsize) validsets++; } numaDestroy(&na); ratio = (l_float32)validsets / (l_float32)recog->charset_size; *pok = (ratio >= minfract) ? 1 : 0; return 0; } /*! * \brief recogFilterPixaBySize() * * \param[in] pixas labeled templates * \param[in] setsize size of character set (number of classes) * \param[in] maxkeep max number of templates to keep in a class * \param[in] max_ht_ratio max allowed height ratio (see below) * \param[out] pna [optional] debug output, giving the number * in each class after filtering; use NULL to skip * \return pixa filtered templates, or NULL on error * *

 * Notes:
 *      (1) The basic assumption is that the most common and larger
 *          templates in each class are more likely to represent the
 *          characters we are interested in.  For example, larger digits
 *          are more likely to represent page numbers, and smaller digits
 *          could be data in tables.  Therefore, we bias the first
 *          stage of filtering toward the larger characters by removing
 *          very small ones, and select based on proximity of the
 *          remaining characters to median height.
 *      (2) For each of the %setsize classes, order the templates
 *          increasingly by height.  Take the rank 0.9 height.  Eliminate
 *          all templates that are shorter by more than %max_ht_ratio.
 *          Of the remaining ones, select up to %maxkeep that are closest
 *          in rank order height to the median template.
 *

*/ PIXA * recogFilterPixaBySize(PIXA *pixas, l_int32 setsize, l_int32 maxkeep, l_float32 max_ht_ratio, NUMA **pna) { l_int32 i, j, h90, hj, j1, j2, j90, n, nc; l_float32 ratio; NUMA *na; PIXA *pixa1, *pixa2, *pixa3, *pixa4, *pixa5; PIXAA *paa; PROCNAME("recogFilterPixaBySize"); if (pna) *pna = NULL; if (!pixas) return (PIXA *)ERROR_PTR("pixas not defined", procName, NULL); if ((paa = recogSortPixaByClass(pixas, setsize)) == NULL) return (PIXA *)ERROR_PTR("paa not made", procName, NULL); nc = pixaaGetCount(paa, NULL); na = (pna) ? numaCreate(0) : NULL; if (pna) *pna = na; pixa5 = pixaCreate(0); for (i = 0; i < nc; i++) { pixa1 = pixaaGetPixa(paa, i, L_CLONE); if ((n = pixaGetCount(pixa1)) == 0) { pixaDestroy(&pixa1); continue; } pixa2 = pixaSort(pixa1, L_SORT_BY_HEIGHT, L_SORT_INCREASING, NULL, L_COPY); j90 = (l_int32)(0.9 * n); pixaGetPixDimensions(pixa2, j90, NULL, &h90, NULL); pixa3 = pixaCreate(n); for (j = 0; j < n; j++) { pixaGetPixDimensions(pixa2, j, NULL, &hj, NULL); ratio = (l_float32)h90 / (l_float32)hj; if (ratio <= max_ht_ratio) pixaAddPix(pixa3, pixaGetPix(pixa2, j, L_COPY), L_INSERT); } n = pixaGetCount(pixa3); if (n <= maxkeep) { pixa4 = pixaCopy(pixa3, L_CLONE); } else { j1 = (n - maxkeep) / 2; j2 = j1 + maxkeep - 1; pixa4 = pixaSelectRange(pixa3, j1, j2, L_CLONE); } if (na) numaAddNumber(na, pixaGetCount(pixa4)); pixaJoin(pixa5, pixa4, 0, -1); pixaDestroy(&pixa1); pixaDestroy(&pixa2); pixaDestroy(&pixa3); pixaDestroy(&pixa4); } pixaaDestroy(&paa); return pixa5; } /*! * \brief recogSortPixaByClass() * * \param[in] pixa labeled templates * \param[in] setsize size of character set (number of classes) * \return paa pixaa where each pixa has templates for one class, * or null on error */ PIXAA * recogSortPixaByClass(PIXA *pixa, l_int32 setsize) { PIXAA *paa; L_RECOG *recog; PROCNAME("recogSortPixaByClass"); if (!pixa) return (PIXAA *)ERROR_PTR("pixa not defined", procName, NULL); if ((recog = recogCreateFromPixaNoFinish(pixa, 0, 0, 0, 0, 0)) == NULL) return (PIXAA *)ERROR_PTR("recog not made", procName, NULL); paa = recog->pixaa_u; /* grab the paa of unscaled templates */ recog->pixaa_u = NULL; recogDestroy(&recog); return paa; } /*! * \brief recogRemoveOutliers1() * * \param[in] precog addr of recog with unscaled labeled templates * \param[in] minscore keep everything with at least this score * \param[in] mintarget minimum desired number to retain if possible * \param[in] minsize minimum number of samples required for a class * \param[out] ppixsave [optional debug] saved templates, with scores * \param[out] ppixrem [optional debug] removed templates, with scores * \return 0 if OK, 1 on error. * *

 * Notes:
 *      (1) This is a convenience wrapper when using default parameters
 *          for the recog.  See pixaRemoveOutliers1() for details.
 *      (2) If this succeeds, the new recog replaces the input recog;
 *          if it fails, the input recog is destroyed.
 *

*/ l_ok recogRemoveOutliers1(L_RECOG **precog, l_float32 minscore, l_int32 mintarget, l_int32 minsize, PIX **ppixsave, PIX **ppixrem) { PIXA *pixa1, *pixa2; L_RECOG *recog; PROCNAME("recogRemoveOutliers1"); if (!precog) return ERROR_INT("&recog not defined", procName, 1); if (*precog == NULL) return ERROR_INT("recog not defined", procName, 1); /* Extract the unscaled templates */ pixa1 = recogExtractPixa(*precog); recogDestroy(precog); pixa2 = pixaRemoveOutliers1(pixa1, minscore, mintarget, minsize, ppixsave, ppixrem); pixaDestroy(&pixa1); if (!pixa2) return ERROR_INT("failure to remove outliers", procName, 1); recog = recogCreateFromPixa(pixa2, 0, 0, 0, 150, 1); pixaDestroy(&pixa2); if (!recog) return ERROR_INT("failure to make recog from pixa sans outliers", procName, 1); *precog = recog; return 0; } /*! * \brief pixaRemoveOutliers1() * * \param[in] pixas unscaled labeled templates * \param[in] minscore keep everything with at least this score; * use -1.0 for default. * \param[in] mintarget minimum desired number to retain if possible; * use -1 for default. * \param[in] minsize minimum number of samples required for a class; * use -1 for default. * \param[out] ppixsave [optional debug] saved templates, with scores * \param[out] ppixrem [optional debug] removed templates, with scores * \return pixa of unscaled templates to be kept, or NULL on error * *

 * Notes:
 *      (1) Removing outliers is particularly important when recognition
 *          goes against all the samples in the training set, as opposed
 *          to the averages for each class.  The reason is that we get
 *          an identification error if a mislabeled template is a best
 *          match for an input sample.
 *      (2) Because the score values depend strongly on the quality
 *          of the character images, to avoid losing too many samples
 *          we supplement a minimum score for retention with a score
 *          necessary to acquire the minimum target number of templates.
 *          To do this we are willing to use a lower threshold,
 *          LowerScoreThreshold, on the score.  Consequently, with
 *          poor quality templates, we may keep samples with a score
 *          less than %minscore, but never less than LowerScoreThreshold.
 *          And if the number of samples is less than %minsize, we do
 *          not use any.
 *      (3) This is meant to be used on a BAR, where the templates all
 *          come from the same book; use minscore ~0.75.
 *      (4) Method: make a scaled recog from the input %pixas.  Then,
 *          for each class: generate the averages, match each
 *          scaled template against the average, and save unscaled
 *          templates that had a sufficiently good match.
 *

*/ PIXA * pixaRemoveOutliers1(PIXA *pixas, l_float32 minscore, l_int32 mintarget, l_int32 minsize, PIX **ppixsave, PIX **ppixrem) { l_int32 i, j, debug, n, area1, area2; l_float32 x1, y1, x2, y2, minfract, score, rankscore, threshscore; NUMA *nasum, *narem, *nasave, *nascore; PIX *pix1, *pix2; PIXA *pixa, *pixarem, *pixad; PTA *pta; L_RECOG *recog; PROCNAME("pixaRemoveOutliers1"); if (ppixsave) *ppixsave = NULL; if (ppixrem) *ppixrem = NULL; if (!pixas) return (PIXA *)ERROR_PTR("pixas not defined", procName, NULL); minscore = L_MIN(minscore, 1.0); if (minscore <= 0.0) minscore = DefaultMinScore; mintarget = L_MIN(mintarget, 3); if (mintarget <= 0) mintarget = DefaultMinTarget; if (minsize < 0) minsize = DefaultMinSetSize; /* Make a special height-scaled recognizer with average templates */ debug = (ppixsave || ppixrem) ? 1 : 0; recog = recogCreateFromPixa(pixas, 0, 40, 0, 128, 1); if (!recog) return (PIXA *)ERROR_PTR("bad pixas; recog not made", procName, NULL); recogAverageSamples(&recog, debug); if (!recog) return (PIXA *)ERROR_PTR("bad templates", procName, NULL); nasave = (ppixsave) ? numaCreate(0) : NULL; pixarem = (ppixrem) ? pixaCreate(0) : NULL; narem = (ppixrem) ? numaCreate(0) : NULL; pixad = pixaCreate(0); for (i = 0; i < recog->setsize; i++) { /* Access the average template and values for scaled * images in this class */ pix1 = pixaGetPix(recog->pixa, i, L_CLONE); ptaGetPt(recog->pta, i, &x1, &y1); numaGetIValue(recog->nasum, i, &area1); /* Get the scores for each sample in the class */ pixa = pixaaGetPixa(recog->pixaa, i, L_CLONE); pta = ptaaGetPta(recog->ptaa, i, L_CLONE); /* centroids */ nasum = numaaGetNuma(recog->naasum, i, L_CLONE); /* fg areas */ n = pixaGetCount(pixa); nascore = numaCreate(n); for (j = 0; j < n; j++) { pix2 = pixaGetPix(pixa, j, L_CLONE); ptaGetPt(pta, j, &x2, &y2); /* centroid average */ numaGetIValue(nasum, j, &area2); /* fg sum average */ pixCorrelationScoreSimple(pix1, pix2, area1, area2, x1 - x2, y1 - y2, 5, 5, recog->sumtab, &score); numaAddNumber(nascore, score); if (debug && score == 0.0) /* typ. large size difference */ fprintf(stderr, "Got 0 score for i = %d, j = %d\n", i, j); pixDestroy(&pix2); } pixDestroy(&pix1); /* Find the rankscore, corresponding to the 1.0 - minfract. * To attempt to maintain the minfract of templates, use as a * cutoff the minimum of minscore and the rank score. However, * no template is saved with an actual score less than * that at least one template is kept. */ minfract = (l_float32)mintarget / (l_float32)n; numaGetRankValue(nascore, 1.0 - minfract, NULL, 0, &rankscore); threshscore = L_MAX(LowerScoreThreshold, L_MIN(minscore, rankscore)); if (debug) { L_INFO("minscore = %4.2f, rankscore = %4.2f, threshscore = %4.2f\n", procName, minscore, rankscore, threshscore); } /* Save templates that are at or above threshold. * Toss any classes with less than %minsize templates. */ for (j = 0; j < n; j++) { numaGetFValue(nascore, j, &score); pix1 = pixaaGetPix(recog->pixaa_u, i, j, L_COPY); if (score >= threshscore && n >= minsize) { pixaAddPix(pixad, pix1, L_INSERT); if (nasave) numaAddNumber(nasave, score); } else if (debug) { pixaAddPix(pixarem, pix1, L_INSERT); numaAddNumber(narem, score); } else { pixDestroy(&pix1); } } pixaDestroy(&pixa); ptaDestroy(&pta); numaDestroy(&nasum); numaDestroy(&nascore); } if (ppixsave) { *ppixsave = pixDisplayOutliers(pixad, nasave); numaDestroy(&nasave); } if (ppixrem) { *ppixrem = pixDisplayOutliers(pixarem, narem); pixaDestroy(&pixarem); numaDestroy(&narem); } recogDestroy(&recog); return pixad; } /*! * \brief recogRemoveOutliers2() * * \param[in] precog addr of recog with unscaled labeled templates * \param[in] minscore keep everything with at least this score * \param[in] minsize minimum number of samples required for a class * \param[out] ppixsave [optional debug] saved templates, with scores * \param[out] ppixrem [optional debug] removed templates, with scores * \return 0 if OK, 1 on error. * *

 * Notes:
 *      (1) This is a convenience wrapper when using default parameters
 *          for the recog.  See pixaRemoveOutliers2() for details.
 *      (2) If this succeeds, the new recog replaces the input recog;
 *          if it fails, the input recog is destroyed.
 *

*/ l_ok recogRemoveOutliers2(L_RECOG **precog, l_float32 minscore, l_int32 minsize, PIX **ppixsave, PIX **ppixrem) { PIXA *pixa1, *pixa2; L_RECOG *recog; PROCNAME("recogRemoveOutliers2"); if (!precog) return ERROR_INT("&recog not defined", procName, 1); if (*precog == NULL) return ERROR_INT("recog not defined", procName, 1); /* Extract the unscaled templates */ pixa1 = recogExtractPixa(*precog); recogDestroy(precog); pixa2 = pixaRemoveOutliers2(pixa1, minscore, minsize, ppixsave, ppixrem); pixaDestroy(&pixa1); if (!pixa2) return ERROR_INT("failure to remove outliers", procName, 1); recog = recogCreateFromPixa(pixa2, 0, 0, 0, 150, 1); pixaDestroy(&pixa2); if (!recog) return ERROR_INT("failure to make recog from pixa sans outliers", procName, 1); *precog = recog; return 0; } /*! * \brief pixaRemoveOutliers2() * * \param[in] pixas unscaled labeled templates * \param[in] minscore keep everything with at least this score; * use -1.0 for default. * \param[in] minsize minimum number of samples required for a class; * use -1 for default. * \param[out] ppixsave [optional debug] saved templates, with scores * \param[out] ppixrem [optional debug] removed templates, with scores * \return pixa of unscaled templates to be kept, or NULL on error * *

 * Notes:
 *      (1) Removing outliers is particularly important when recognition
 *          goes against all the samples in the training set, as opposed
 *          to the averages for each class.  The reason is that we get
 *          an identification error if a mislabeled template is a best
 *          match for an input sample.
 *      (2) This method compares each template against the average templates
 *          of each class, and discards any template that has a higher
 *          correlation to a class different from its own.  It also
 *          sets a lower bound on correlation scores with its class average.
 *      (3) This is meant to be used on a BAR, where the templates all
 *          come from the same book; use minscore ~0.75.
 *

*/ PIXA * pixaRemoveOutliers2(PIXA *pixas, l_float32 minscore, l_int32 minsize, PIX **ppixsave, PIX **ppixrem) { l_int32 i, j, k, n, area1, area2, maxk, debug; l_float32 x1, y1, x2, y2, score, maxscore; NUMA *nan, *nascore, *nasave; PIX *pix1, *pix2, *pix3; PIXA *pixarem, *pixad; L_RECOG *recog; PROCNAME("pixaRemoveOutliers2"); if (ppixsave) *ppixsave = NULL; if (ppixrem) *ppixrem = NULL; if (!pixas) return (PIXA *)ERROR_PTR("pixas not defined", procName, NULL); minscore = L_MIN(minscore, 1.0); if (minscore <= 0.0) minscore = DefaultMinScore; if (minsize < 0) minsize = DefaultMinSetSize; /* Make a special height-scaled recognizer with average templates */ debug = (ppixsave || ppixrem) ? 1 : 0; recog = recogCreateFromPixa(pixas, 0, 40, 0, 128, 1); if (!recog) return (PIXA *)ERROR_PTR("bad pixas; recog not made", procName, NULL); recogAverageSamples(&recog, debug); if (!recog) return (PIXA *)ERROR_PTR("bad templates", procName, NULL); nasave = (ppixsave) ? numaCreate(0) : NULL; pixarem = (ppixrem) ? pixaCreate(0) : NULL; pixad = pixaCreate(0); pixaaGetCount(recog->pixaa, &nan); /* number of templates in each class */ for (i = 0; i < recog->setsize; i++) { /* Get the scores for each sample in the class, when comparing * with averages from all the classes. */ numaGetIValue(nan, i, &n); for (j = 0; j < n; j++) { pix1 = pixaaGetPix(recog->pixaa, i, j, L_CLONE); ptaaGetPt(recog->ptaa, i, j, &x1, &y1); /* centroid */ numaaGetValue(recog->naasum, i, j, NULL, &area1); /* fg sum */ nascore = numaCreate(n); for (k = 0; k < recog->setsize; k++) { /* average templates */ pix2 = pixaGetPix(recog->pixa, k, L_CLONE); ptaGetPt(recog->pta, k, &x2, &y2); /* average centroid */ numaGetIValue(recog->nasum, k, &area2); /* average fg sum */ pixCorrelationScoreSimple(pix1, pix2, area1, area2, x1 - x2, y1 - y2, 5, 5, recog->sumtab, &score); numaAddNumber(nascore, score); pixDestroy(&pix2); } /* Save templates that are in the correct class and * at or above threshold. Toss any classes with less * than %minsize templates. */ numaGetMax(nascore, &maxscore, &maxk); if (maxk == i && maxscore >= minscore && n >= minsize) { /* save it */ pix3 = pixaaGetPix(recog->pixaa_u, i, j, L_COPY); pixaAddPix(pixad, pix3, L_INSERT); if (nasave) numaAddNumber(nasave, maxscore); } else if (ppixrem) { /* outlier */ pix3 = recogDisplayOutlier(recog, i, j, maxk, maxscore); pixaAddPix(pixarem, pix3, L_INSERT); } numaDestroy(&nascore); pixDestroy(&pix1); } } if (ppixsave) { *ppixsave = pixDisplayOutliers(pixad, nasave); numaDestroy(&nasave); } if (ppixrem) { *ppixrem = pixaDisplayTiledInRows(pixarem, 32, 1500, 1.0, 0, 20, 2); pixaDestroy(&pixarem); } numaDestroy(&nan); recogDestroy(&recog); return pixad; } /*------------------------------------------------------------------------* * Training on unlabeled data * *------------------------------------------------------------------------*/ /*! * \brief recogTrainFromBoot() * * \param[in] recogboot labeled boot recognizer * \param[in] pixas set of unlabeled input characters * \param[in] minscore min score for accepting the example; e.g., 0.75 * \param[in] threshold for binarization, if needed * \param[in] debug 1 for debug output saved to recogboot; 0 otherwise * \return pixad labeled version of input pixas, trained on a BSR, * or NULL on error * *

 * Notes:
 *      (1) This takes %pixas of unscaled single characters and %recboot,
 *          a bootstrep recognizer (BSR) that has been set up with parameters
 *            * scaleh: scale all templates to this height
 *            * linew: width of normalized strokes, or 0 if using
 *              the input image
 *          It modifies the pix in %pixas accordingly and correlates
 *          with the templates in the BSR.  It returns those input
 *          images in %pixas whose best correlation with the BSR is at
 *          or above %minscore.  The returned pix have added text labels
 *          for the text string of the class to which the best
 *          correlated template belongs.
 *      (2) Identification occurs in scaled mode (typically with h = 40),
 *          optionally using a width-normalized line images derived
 *          from those in %pixas.
 *

*/ PIXA * recogTrainFromBoot(L_RECOG *recogboot, PIXA *pixas, l_float32 minscore, l_int32 threshold, l_int32 debug) { char *text; l_int32 i, n, same, maxd, scaleh, linew; l_float32 score; PIX *pix1, *pix2, *pixdb; PIXA *pixa1, *pixa2, *pixa3, *pixad; PROCNAME("recogTrainFromBoot"); if (!recogboot) return (PIXA *)ERROR_PTR("recogboot not defined", procName, NULL); if (!pixas) return (PIXA *)ERROR_PTR("pixas not defined", procName, NULL); /* Make sure all input pix are 1 bpp */ if ((n = pixaGetCount(pixas)) == 0) return (PIXA *)ERROR_PTR("no pix in pixa", procName, NULL); pixaVerifyDepth(pixas, &same, &maxd); if (maxd == 1) { pixa1 = pixaCopy(pixas, L_COPY); } else { pixa1 = pixaCreate(n); for (i = 0; i < n; i++) { pix1 = pixaGetPix(pixas, i, L_CLONE); pix2 = pixConvertTo1(pix1, threshold); pixaAddPix(pixa1, pix2, L_INSERT); pixDestroy(&pix1); } } /* Scale the input images to match the BSR */ scaleh = recogboot->scaleh; linew = recogboot->linew; pixa2 = pixaCreate(n); for (i = 0; i < n; i++) { pix1 = pixaGetPix(pixa1, i, L_CLONE); pix2 = pixScaleToSize(pix1, 0, scaleh); pixaAddPix(pixa2, pix2, L_INSERT); pixDestroy(&pix1); } pixaDestroy(&pixa1); /* Optionally convert to width-normalized line */ if (linew > 0) pixa3 = pixaSetStrokeWidth(pixa2, linew, 4, 8); else pixa3 = pixaCopy(pixa2, L_CLONE); pixaDestroy(&pixa2); /* Identify using recogboot */ n = pixaGetCount(pixa3); pixad = pixaCreate(n); for (i = 0; i < n; i++) { pix1 = pixaGetPix(pixa3, i, L_COPY); pixSetText(pix1, NULL); /* remove any existing text or labelling */ if (!debug) { recogIdentifyPix(recogboot, pix1, NULL); } else { recogIdentifyPix(recogboot, pix1, &pixdb); pixaAddPix(recogboot->pixadb_boot, pixdb, L_INSERT); } rchExtract(recogboot->rch, NULL, &score, &text, NULL, NULL, NULL, NULL); if (score >= minscore) { pix2 = pixaGetPix(pixas, i, L_COPY); pixSetText(pix2, text); pixaAddPix(pixad, pix2, L_INSERT); pixaAddPix(recogboot->pixadb_boot, pixdb, L_COPY); } LEPT_FREE(text); pixDestroy(&pix1); } pixaDestroy(&pixa3); return pixad; } /*------------------------------------------------------------------------* * Padding the digit training set * *------------------------------------------------------------------------*/ /*! * \brief recogPadDigitTrainingSet() * * \param[in,out] precog trained; if padding is needed, it is replaced * by a a new padded recog * \param[in] scaleh must be > 0; suggest ~40. * \param[in] linew use 0 for original scanned images * \return 0 if OK, 1 on error * *

 * Notes:
 *      (1) This is a no-op if padding is not needed.  However,
 *          if it is, this replaces the input recog with a new recog,
 *          padded appropriately with templates from a boot recognizer,
 *          and set up with correlation templates derived from
 *          %scaleh and %linew.
 *

*/ l_ok recogPadDigitTrainingSet(L_RECOG **precog, l_int32 scaleh, l_int32 linew) { PIXA *pixa; L_RECOG *recog1, *recog2; SARRAY *sa; PROCNAME("recogPadDigitTrainingSet"); if (!precog) return ERROR_INT("&recog not defined", procName, 1); recog1 = *precog; recogIsPaddingNeeded(recog1, &sa); if (!sa) return 0; /* Get a new pixa with the padding templates added */ pixa = recogAddDigitPadTemplates(recog1, sa); sarrayDestroy(&sa); if (!pixa) return ERROR_INT("pixa not made", procName, 1); /* Need to use templates that are scaled to a fixed height. */ if (scaleh <= 0) { L_WARNING("templates must be scaled to fixed height; using %d\n", procName, 40); scaleh = 40; } /* Create a hybrid recog, composed of templates from both * the original and bootstrap sources. */ recog2 = recogCreateFromPixa(pixa, 0, scaleh, linew, recog1->threshold, recog1->maxyshift); pixaDestroy(&pixa); recogDestroy(precog); *precog = recog2; return 0; } /*! * \brief recogIsPaddingNeeded() * * \param[in] recog trained * \param[out] psa addr of returned string containing text value * \return 1 on error; 0 if OK, whether or not additional padding * templates are required. * *

 * Notes:
 *      (1) This returns a string array in &sa containing character values
 *          for which extra templates are needed; this sarray is
 *          used by recogGetPadTemplates().  It returns NULL
 *          if no padding templates are needed.
 *

*/ l_int32 recogIsPaddingNeeded(L_RECOG *recog, SARRAY **psa) { char *str; l_int32 i, nt, min_nopad, nclass, allclasses; l_float32 minval; NUMA *naclass; SARRAY *sa; PROCNAME("recogIsPaddingNeeded"); if (!psa) return ERROR_INT("&sa not defined", procName, 1); *psa = NULL; if (!recog) return ERROR_INT("recog not defined", procName, 1); /* Do we have samples from all classes? */ nclass = pixaaGetCount(recog->pixaa_u, &naclass); /* unscaled bitmaps */ allclasses = (nclass == recog->charset_size) ? 1 : 0; /* Are there enough samples in each class already? */ min_nopad = recog->min_nopad; numaGetMin(naclass, &minval, NULL); if (allclasses && (minval >= min_nopad)) { numaDestroy(&naclass); return 0; } /* Are any classes not represented? */ sa = recogAddMissingClassStrings(recog); *psa = sa; /* Are any other classes under-represented? */ for (i = 0; i < nclass; i++) { numaGetIValue(naclass, i, &nt); if (nt < min_nopad) { str = sarrayGetString(recog->sa_text, i, L_COPY); sarrayAddString(sa, str, L_INSERT); } } numaDestroy(&naclass); return 0; } /*! * \brief recogAddMissingClassStrings() * * \param[in] recog trained * \return sa of class string missing in %recog, or NULL on error * *

 * Notes:
 *      (1) This returns an empty %sa if there is at least one template
 *          in each class in %recog.
 *

*/ static SARRAY * recogAddMissingClassStrings(L_RECOG *recog) { char *text; char str[4]; l_int32 i, nclass, index, ival; NUMA *na; SARRAY *sa; PROCNAME("recogAddMissingClassStrings"); if (!recog) return (SARRAY *)ERROR_PTR("recog not defined", procName, NULL); /* Only handling digits */ nclass = pixaaGetCount(recog->pixaa_u, NULL); /* unscaled bitmaps */ if (recog->charset_type != 1 || nclass == 10) return sarrayCreate(0); /* empty */ /* Make an indicator array for missing classes */ na = numaCreate(0); sa = sarrayCreate(0); for (i = 0; i < recog->charset_size; i++) numaAddNumber(na, 1); for (i = 0; i < nclass; i++) { text = sarrayGetString(recog->sa_text, i, L_NOCOPY); index = text[0] - '0'; numaSetValue(na, index, 0); } /* Convert to string and add to output */ for (i = 0; i < nclass; i++) { numaGetIValue(na, i, &ival); if (ival == 1) { str[0] = '0' + i; str[1] = '\0'; sarrayAddString(sa, str, L_COPY); } } numaDestroy(&na); return sa; } /*! * \brief recogAddDigitPadTemplates() * * \param[in] recog trained * \param[in] sa set of text strings that need to be padded * \return pixa of all templates from %recog and the additional pad * templates from a boot recognizer; or NULL on error * *

 * Notes:
 *      (1) Call recogIsPaddingNeeded() first, which returns %sa of
 *          template text strings for classes where more templates
 *          are needed.
 *

*/ PIXA * recogAddDigitPadTemplates(L_RECOG *recog, SARRAY *sa) { char *str, *text; l_int32 i, j, n, nt; PIX *pix; PIXA *pixa1, *pixa2; PROCNAME("recogAddDigitPadTemplates"); if (!recog) return (PIXA *)ERROR_PTR("recog not defined", procName, NULL); if (!sa) return (PIXA *)ERROR_PTR("sa not defined", procName, NULL); if (recogCharsetAvailable(recog->charset_type) == FALSE) return (PIXA *)ERROR_PTR("boot charset not available", procName, NULL); /* Make boot recog templates */ pixa1 = recogMakeBootDigitTemplates(0, 0); n = pixaGetCount(pixa1); /* Extract the unscaled templates from %recog */ pixa2 = recogExtractPixa(recog); /* Add selected boot recog templates based on the text strings in sa */ nt = sarrayGetCount(sa); for (i = 0; i < n; i++) { pix = pixaGetPix(pixa1, i, L_CLONE); text = pixGetText(pix); for (j = 0; j < nt; j++) { str = sarrayGetString(sa, j, L_NOCOPY); if (!strcmp(text, str)) { pixaAddPix(pixa2, pix, L_COPY); break; } } pixDestroy(&pix); } pixaDestroy(&pixa1); return pixa2; } /*! * \brief recogCharsetAvailable() * * \param[in] type of charset for padding * \return 1 if available; 0 if not. */ static l_int32 recogCharsetAvailable(l_int32 type) { l_int32 ret; PROCNAME("recogCharsetAvailable"); switch (type) { case L_ARABIC_NUMERALS: ret = TRUE; break; case L_LC_ROMAN_NUMERALS: case L_UC_ROMAN_NUMERALS: case L_LC_ALPHA: case L_UC_ALPHA: L_INFO("charset type %d not available\n", procName, type); ret = FALSE; break; default: L_INFO("charset type %d is unknown\n", procName, type); ret = FALSE; break; } return ret; } /*------------------------------------------------------------------------* * Making a boot digit recognizer * *------------------------------------------------------------------------*/ /*! * \brief recogMakeBootDigitRecog() * * \param[in] nsamp number of samples of each digit; or 0 * \param[in] scaleh scale all heights to this; typ. use 40 * \param[in] linew normalized line width; typ. use 5; 0 to skip * \param[in] maxyshift from nominal centroid alignment; typically 0 or 1 * \param[in] debug 1 for showing templates; 0 otherwise * \return recog, or NULL on error * *

 * Notes:
 *     (1) This takes a set of pre-computed, labeled pixa of single
 *         digits, and generates a recognizer from them.
 *         The templates used in the recognizer can be modified by:
 *         - scaling (isotropically to fixed height)
 *         - generating a skeleton and thickening so that all strokes
 *           have the same width.
 *     (2) The resulting templates are scaled versions of either the
 *         input bitmaps or images with fixed line widths.  To use the
 *         input bitmaps, set %linew = 0; otherwise, set %linew to the
 *         desired line width.
 *     (3) If %nsamp == 0, this uses and extends the output from
 *         three boot generators:
 *            l_bootnum_gen1, l_bootnum_gen2, l_bootnum_gen3.
 *         Otherwise, it uses exactly %nsamp templates of each digit,
 *         extracted by l_bootnum_gen4.
 *

*/ L_RECOG * recogMakeBootDigitRecog(l_int32 nsamp, l_int32 scaleh, l_int32 linew, l_int32 maxyshift, l_int32 debug) { PIXA *pixa; L_RECOG *recog; /* Get the templates, extended by horizontal scaling */ pixa = recogMakeBootDigitTemplates(nsamp, debug); /* Make the boot recog; recogModifyTemplate() will scale the * templates and optionally turn them into strokes of fixed width. */ recog = recogCreateFromPixa(pixa, 0, scaleh, linew, 128, maxyshift); pixaDestroy(&pixa); if (debug) recogShowContent(stderr, recog, 0, 1); return recog; } /*! * \brief recogMakeBootDigitTemplates() * * \param[in] nsamp number of samples of each digit; or 0 * \param[in] debug 1 for display of templates * \return pixa of templates; or NULL on error * *

 * Notes:
 *     (1) See recogMakeBootDigitRecog().
 *

*/ PIXA * recogMakeBootDigitTemplates(l_int32 nsamp, l_int32 debug) { NUMA *na1; PIX *pix1, *pix2, *pix3; PIXA *pixa1, *pixa2, *pixa3; if (nsamp > 0) { pixa1 = l_bootnum_gen4(nsamp); if (debug) { pix1 = pixaDisplayTiledWithText(pixa1, 1500, 1.0, 10, 2, 6, 0xff000000); pixDisplay(pix1, 0, 0); pixDestroy(&pix1); } return pixa1; } /* Else, generate from 3 pixa */ pixa1 = l_bootnum_gen1(); pixa2 = l_bootnum_gen2(); pixa3 = l_bootnum_gen3(); if (debug) { pix1 = pixaDisplayTiledWithText(pixa1, 1500, 1.0, 10, 2, 6, 0xff000000); pix2 = pixaDisplayTiledWithText(pixa2, 1500, 1.0, 10, 2, 6, 0xff000000); pix3 = pixaDisplayTiledWithText(pixa3, 1500, 1.0, 10, 2, 6, 0xff000000); pixDisplay(pix1, 0, 0); pixDisplay(pix2, 600, 0); pixDisplay(pix3, 1200, 0); pixDestroy(&pix1); pixDestroy(&pix2); pixDestroy(&pix3); } pixaJoin(pixa1, pixa2, 0, -1); pixaJoin(pixa1, pixa3, 0, -1); pixaDestroy(&pixa2); pixaDestroy(&pixa3); /* Extend by horizontal scaling */ na1 = numaCreate(4); numaAddNumber(na1, 0.9); numaAddNumber(na1, 1.1); numaAddNumber(na1, 1.2); pixa2 = pixaExtendByScaling(pixa1, na1, L_HORIZ, 1); pixaDestroy(&pixa1); numaDestroy(&na1); return pixa2; } /*------------------------------------------------------------------------* * Debugging * *------------------------------------------------------------------------*/ /*! * \brief recogShowContent() * * \param[in] fp file stream * \param[in] recog * \param[in] index for naming of output files of template images * \param[in] display 1 for showing template images; 0 otherwise * \return 0 if OK, 1 on error */ l_ok recogShowContent(FILE *fp, L_RECOG *recog, l_int32 index, l_int32 display) { char buf[128]; l_int32 i, val, count; PIX *pix; NUMA *na; PROCNAME("recogShowContent"); if (!fp) return ERROR_INT("stream not defined", procName, 1); if (!recog) return ERROR_INT("recog not defined", procName, 1); fprintf(fp, "Debug print of recog contents\n"); fprintf(fp, " Setsize: %d\n", recog->setsize); fprintf(fp, " Binarization threshold: %d\n", recog->threshold); fprintf(fp, " Maximum matching y-jiggle: %d\n", recog->maxyshift); if (recog->linew <= 0) fprintf(fp, " Using image templates for matching\n"); else fprintf(fp, " Using templates with fixed line width for matching\n"); if (recog->scalew == 0) fprintf(fp, " No width scaling of templates\n"); else fprintf(fp, " Template width scaled to %d\n", recog->scalew); if (recog->scaleh == 0) fprintf(fp, " No height scaling of templates\n"); else fprintf(fp, " Template height scaled to %d\n", recog->scaleh); fprintf(fp, " Number of samples in each class:\n"); pixaaGetCount(recog->pixaa_u, &na); for (i = 0; i < recog->setsize; i++) { l_dnaGetIValue(recog->dna_tochar, i, &val); numaGetIValue(na, i, &count); if (val < 128) fprintf(fp, " class %d, char %c: %d\n", i, val, count); else fprintf(fp, " class %d, val %d: %d\n", i, val, count); } numaDestroy(&na); if (display) { lept_mkdir("lept/recog"); pix = pixaaDisplayByPixa(recog->pixaa_u, 20, 20, 1000); snprintf(buf, sizeof(buf), "/tmp/lept/recog/templates_u.%d.png", index); pixWriteDebug(buf, pix, IFF_PNG); pixDisplay(pix, 0, 200 * index); pixDestroy(&pix); if (recog->train_done) { pix = pixaaDisplayByPixa(recog->pixaa, 20, 20, 1000); snprintf(buf, sizeof(buf), "/tmp/lept/recog/templates.%d.png", index); pixWriteDebug(buf, pix, IFF_PNG); pixDisplay(pix, 800, 200 * index); pixDestroy(&pix); } } return 0; } /*! * \brief recogDebugAverages() * * \param[in] precog addr of recog * \param[in] debug 0 no output; 1 for images; 2 for text; 3 for both * \return 0 if OK, 1 on error * *

 * Notes:
 *      (1) Generates an image that pairs each of the input images used
 *          in training with the average template that it is best
 *          correlated to.  This is written into the recog.
 *      (2) It also generates pixa_tr of all the input training images,
 *          which can be used, e.g., in recogShowMatchesInRange().
 *      (3) Destroys the recog if the averaging function finds any bad classes.
 *

*/ l_ok recogDebugAverages(L_RECOG **precog, l_int32 debug) { l_int32 i, j, n, np, index; l_float32 score; PIX *pix1, *pix2, *pix3; PIXA *pixa, *pixat; PIXAA *paa1, *paa2; L_RECOG *recog; PROCNAME("recogDebugAverages"); if (!precog) return ERROR_INT("&recog not defined", procName, 1); if ((recog = *precog) == NULL) return ERROR_INT("recog not defined", procName, 1); /* Mark the training as finished if necessary, and make sure * that the average templates have been built. */ recogAverageSamples(&recog, 0); if (!recog) return ERROR_INT("averaging failed; recog destroyed", procName, 1); /* Save a pixa of all the training examples */ paa1 = recog->pixaa; if (!recog->pixa_tr) recog->pixa_tr = pixaaFlattenToPixa(paa1, NULL, L_CLONE); /* Destroy any existing image and make a new one */ if (recog->pixdb_ave) pixDestroy(&recog->pixdb_ave); n = pixaaGetCount(paa1, NULL); paa2 = pixaaCreate(n); for (i = 0; i < n; i++) { pixa = pixaCreate(0); pixat = pixaaGetPixa(paa1, i, L_CLONE); np = pixaGetCount(pixat); for (j = 0; j < np; j++) { pix1 = pixaaGetPix(paa1, i, j, L_CLONE); recogIdentifyPix(recog, pix1, &pix2); rchExtract(recog->rch, &index, &score, NULL, NULL, NULL, NULL, NULL); if (debug >= 2) fprintf(stderr, "index = %d, score = %7.3f\n", index, score); pix3 = pixAddBorder(pix2, 2, 1); pixaAddPix(pixa, pix3, L_INSERT); pixDestroy(&pix1); pixDestroy(&pix2); } pixaaAddPixa(paa2, pixa, L_INSERT); pixaDestroy(&pixat); } recog->pixdb_ave = pixaaDisplayByPixa(paa2, 20, 20, 2500); if (debug % 2) { lept_mkdir("lept/recog"); pixWriteDebug("/tmp/lept/recog/templ_match.png", recog->pixdb_ave, IFF_PNG); pixDisplay(recog->pixdb_ave, 100, 100); } pixaaDestroy(&paa2); return 0; } /*! * \brief recogShowAverageTemplates() * * \param[in] recog * \return 0 on success, 1 on failure * *

 * Notes:
 *      (1) This debug routine generates a display of the averaged templates,
 *          both scaled and unscaled, with the centroid visible in red.
 *

*/ l_int32 recogShowAverageTemplates(L_RECOG *recog) { l_int32 i, size; l_float32 x, y; PIX *pix1, *pix2, *pixr; PIXA *pixat, *pixadb; PROCNAME("recogShowAverageTemplates"); if (!recog) return ERROR_INT("recog not defined", procName, 1); fprintf(stderr, "min/max width_u = (%d,%d); min/max height_u = (%d,%d)\n", recog->minwidth_u, recog->maxwidth_u, recog->minheight_u, recog->maxheight_u); fprintf(stderr, "min splitw = %d, max splith = %d\n", recog->min_splitw, recog->max_splith); pixaDestroy(&recog->pixadb_ave); pixr = pixCreate(3, 3, 32); /* 3x3 red square for centroid location */ pixSetAllArbitrary(pixr, 0xff000000); pixadb = pixaCreate(2); /* Unscaled bitmaps */ size = recog->setsize; pixat = pixaCreate(size); for (i = 0; i < size; i++) { if ((pix1 = pixaGetPix(recog->pixa_u, i, L_CLONE)) == NULL) continue; pix2 = pixConvertTo32(pix1); ptaGetPt(recog->pta_u, i, &x, &y); pixRasterop(pix2, (l_int32)(x - 0.5), (l_int32)(y - 0.5), 3, 3, PIX_SRC, pixr, 0, 0); pixaAddPix(pixat, pix2, L_INSERT); pixDestroy(&pix1); } pix1 = pixaDisplayTiledInRows(pixat, 32, 3000, 1.0, 0, 20, 0); pixaAddPix(pixadb, pix1, L_INSERT); pixDisplay(pix1, 100, 100); pixaDestroy(&pixat); /* Scaled bitmaps */ pixat = pixaCreate(size); for (i = 0; i < size; i++) { if ((pix1 = pixaGetPix(recog->pixa, i, L_CLONE)) == NULL) continue; pix2 = pixConvertTo32(pix1); ptaGetPt(recog->pta, i, &x, &y); pixRasterop(pix2, (l_int32)(x - 0.5), (l_int32)(y - 0.5), 3, 3, PIX_SRC, pixr, 0, 0); pixaAddPix(pixat, pix2, L_INSERT); pixDestroy(&pix1); } pix1 = pixaDisplayTiledInRows(pixat, 32, 3000, 1.0, 0, 20, 0); pixaAddPix(pixadb, pix1, L_INSERT); pixDisplay(pix1, 100, 100); pixaDestroy(&pixat); pixDestroy(&pixr); recog->pixadb_ave = pixadb; return 0; } /*! * \brief pixDisplayOutliers() * * \param[in] pixas unscaled labeled templates * \param[in] nas scores of templates (against class averages) * \return pix tiled pixa with text and scores, or NULL on failure * *

 * Notes:
 *      (1) This debug routine is called from recogRemoveOutliers2(),
 *          and takes the saved templates and their scores as input.
 *

*/ static PIX * pixDisplayOutliers(PIXA *pixas, NUMA *nas) { char *text; char buf[16]; l_int32 i, n; l_float32 fval; PIX *pix1, *pix2; PIXA *pixa1; PROCNAME("pixDisplayOutliers"); if (!pixas) return (PIX *)ERROR_PTR("pixas not defined", procName, NULL); if (!nas) return (PIX *)ERROR_PTR("nas not defined", procName, NULL); n = pixaGetCount(pixas); if (numaGetCount(nas) != n) return (PIX *)ERROR_PTR("pixas and nas sizes differ", procName, NULL); pixa1 = pixaCreate(n); for (i = 0; i < n; i++) { pix1 = pixaGetPix(pixas, i, L_CLONE); pix2 = pixAddBlackOrWhiteBorder(pix1, 25, 25, 0, 0, L_GET_WHITE_VAL); text = pixGetText(pix1); numaGetFValue(nas, i, &fval); snprintf(buf, sizeof(buf), "'%s': %5.2f", text, fval); pixSetText(pix2, buf); pixaAddPix(pixa1, pix2, L_INSERT); pixDestroy(&pix1); } pix1 = pixaDisplayTiledWithText(pixa1, 1500, 1.0, 20, 2, 6, 0xff000000); pixaDestroy(&pixa1); return pix1; } /*! * \brief recogDisplayOutlier() * * \param[in] recog * \param[in] iclass sample is in this class * \param[in] jsamp index of sample is class i * \param[in] maxclass index of class with closest average to sample * \param[in] maxscore score of sample with average of class %maxclass * \return pix sample and template images, with score, or NULL on error * *

 * Notes:
 *      (1) This shows three templates, side-by-side:
 *          - The outlier sample
 *          - The average template from the same class
 *          - The average class template that best matched the outlier sample
 *

*/ static PIX * recogDisplayOutlier(L_RECOG *recog, l_int32 iclass, l_int32 jsamp, l_int32 maxclass, l_float32 maxscore) { char buf[64]; PIX *pix1, *pix2, *pix3, *pix4, *pix5; PIXA *pixa; PROCNAME("recogDisplayOutlier"); if (!recog) return (PIX *)ERROR_PTR("recog not defined", procName, NULL); pix1 = pixaaGetPix(recog->pixaa, iclass, jsamp, L_CLONE); pix2 = pixaGetPix(recog->pixa, iclass, L_CLONE); pix3 = pixaGetPix(recog->pixa, maxclass, L_CLONE); pixa = pixaCreate(3); pixaAddPix(pixa, pix1, L_INSERT); pixaAddPix(pixa, pix2, L_INSERT); pixaAddPix(pixa, pix3, L_INSERT); pix4 = pixaDisplayTiledInRows(pixa, 32, 400, 2.0, 0, 12, 2); snprintf(buf, sizeof(buf), "C=%d, BAC=%d, S=%4.2f", iclass, maxclass, maxscore); pix5 = pixAddSingleTextblock(pix4, recog->bmf, buf, 0xff000000, L_ADD_BELOW, NULL); pixDestroy(&pix4); pixaDestroy(&pixa); return pix5; } /*! * \brief recogShowMatchesInRange() * * \param[in] recog * \param[in] pixa of 1 bpp images to match * \param[in] minscore min score to include output * \param[in] maxscore max score to include output * \param[in] display 1 to display the result * \return 0 if OK, 1 on error * *

 * Notes:
 *      (1) This gives a visual output of the best matches for a given
 *          range of scores.  Each pair of images can optionally be
 *          labeled with the index of the best match and the correlation.
 *      (2) To use this, save a set of 1 bpp images (labeled or
 *          unlabeled) that can be given to a recognizer in a pixa.
 *          Then call this function with the pixa and parameters
 *          to filter a range of scores.
 *

*/ l_ok recogShowMatchesInRange(L_RECOG *recog, PIXA *pixa, l_float32 minscore, l_float32 maxscore, l_int32 display) { l_int32 i, n, index, depth; l_float32 score; NUMA *nascore, *naindex; PIX *pix1, *pix2; PIXA *pixa1, *pixa2; PROCNAME("recogShowMatchesInRange"); if (!recog) return ERROR_INT("recog not defined", procName, 1); if (!pixa) return ERROR_INT("pixa not defined", procName, 1); /* Run the recognizer on the set of images */ n = pixaGetCount(pixa); nascore = numaCreate(n); naindex = numaCreate(n); pixa1 = pixaCreate(n); for (i = 0; i < n; i++) { pix1 = pixaGetPix(pixa, i, L_CLONE); recogIdentifyPix(recog, pix1, &pix2); rchExtract(recog->rch, &index, &score, NULL, NULL, NULL, NULL, NULL); numaAddNumber(nascore, score); numaAddNumber(naindex, index); pixaAddPix(pixa1, pix2, L_INSERT); pixDestroy(&pix1); } /* Filter the set and optionally add text to each */ pixa2 = pixaCreate(n); depth = 1; for (i = 0; i < n; i++) { numaGetFValue(nascore, i, &score); if (score < minscore || score > maxscore) continue; pix1 = pixaGetPix(pixa1, i, L_CLONE); numaGetIValue(naindex, i, &index); pix2 = recogShowMatch(recog, pix1, NULL, NULL, index, score); if (i == 0) depth = pixGetDepth(pix2); pixaAddPix(pixa2, pix2, L_INSERT); pixDestroy(&pix1); } /* Package it up */ pixDestroy(&recog->pixdb_range); if (pixaGetCount(pixa2) > 0) { recog->pixdb_range = pixaDisplayTiledInRows(pixa2, depth, 2500, 1.0, 0, 20, 1); if (display) pixDisplay(recog->pixdb_range, 300, 100); } else { L_INFO("no character matches in the range of scores\n", procName); } pixaDestroy(&pixa1); pixaDestroy(&pixa2); numaDestroy(&nascore); numaDestroy(&naindex); return 0; } /*! * \brief recogShowMatch() * * \param[in] recog * \param[in] pix1 input pix; several possibilities * \param[in] pix2 [optional] matching template * \param[in] box [optional] region in pix1 for which pix2 matches * \param[in] index index of matching template; use -1 to disable printing * \param[in] score score of match * \return pixd pair of images, showing input pix and best template, * optionally with matching information, or NULL on error. * *

 * Notes:
 *      (1) pix1 can be one of these:
 *          (a) The input pix alone, which can be either a single character
 *              (box == NULL) or several characters that need to be
 *              segmented.  If more than character is present, the box
 *              region is displayed with an outline.
 *          (b) Both the input pix and the matching template.  In this case,
 *              pix2 and box will both be null.
 *      (2) If the bmf has been made (by a call to recogMakeBmf())
 *          and the index >= 0, the text field, match score and index
 *          will be rendered; otherwise their values will be ignored.
 *

*/ PIX * recogShowMatch(L_RECOG *recog, PIX *pix1, PIX *pix2, BOX *box, l_int32 index, l_float32 score) { char buf[32]; char *text; L_BMF *bmf; PIX *pix3, *pix4, *pix5, *pixd; PIXA *pixa; PROCNAME("recogShowMatch"); if (!recog) return (PIX *)ERROR_PTR("recog not defined", procName, NULL); if (!pix1) return (PIX *)ERROR_PTR("pix1 not defined", procName, NULL); bmf = (recog->bmf && index >= 0) ? recog->bmf : NULL; if (!pix2 && !box && !bmf) /* nothing to do */ return pixCopy(NULL, pix1); pix3 = pixConvertTo32(pix1); if (box) pixRenderBoxArb(pix3, box, 1, 255, 0, 0); if (pix2) { pixa = pixaCreate(2); pixaAddPix(pixa, pix3, L_CLONE); pixaAddPix(pixa, pix2, L_CLONE); pix4 = pixaDisplayTiledInRows(pixa, 1, 500, 1.0, 0, 15, 0); pixaDestroy(&pixa); } else { pix4 = pixCopy(NULL, pix3); } pixDestroy(&pix3); if (bmf) { pix5 = pixAddBorderGeneral(pix4, 55, 55, 0, 0, 0xffffff00); recogGetClassString(recog, index, &text); snprintf(buf, sizeof(buf), "C=%s, S=%4.3f, I=%d", text, score, index); pixd = pixAddSingleTextblock(pix5, bmf, buf, 0xff000000, L_ADD_BELOW, NULL); pixDestroy(&pix5); LEPT_FREE(text); } else { pixd = pixClone(pix4); } pixDestroy(&pix4); return pixd; }