/*====================================================================* - Copyright (C) 2001 Leptonica. All rights reserved. - - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions - are met: - 1. Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - 2. Redistributions in binary form must reproduce the above - copyright notice, this list of conditions and the following - disclaimer in the documentation and/or other materials - provided with the distribution. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL ANY - CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY - OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. *====================================================================*/ /*! * \file recogtrain.c *
* * Training on labeled data * l_int32 recogTrainLabeled() * PIX *recogProcessLabeled() * l_int32 recogAddSample() * PIX *recogModifyTemplate() * l_int32 recogAverageSamples() * l_int32 pixaAccumulateSamples() * l_int32 recogTrainingFinished() * static l_int32 recogTemplatesAreOK() * PIXA *recogFilterPixaBySize() * PIXAA *recogSortPixaByClass() * l_int32 recogRemoveOutliers1() * PIXA *pixaRemoveOutliers1() * l_int32 recogRemoveOutliers2() * PIXA *pixaRemoveOutliers2() * * Training on unlabeled data * L_RECOG recogTrainFromBoot() * * Padding the digit training set * l_int32 recogPadDigitTrainingSet() * l_int32 recogIsPaddingNeeded() * static SARRAY *recogAddMissingClassStrings() * PIXA *recogAddDigitPadTemplates() * static l_int32 recogCharsetAvailable() * * Making a boot digit recognizer * L_RECOG *recogMakeBootDigitRecog() * PIXA *recogMakeBootDigitTemplates() * * Debugging * l_int32 recogShowContent() * l_int32 recogDebugAverages() * l_int32 recogShowAverageTemplates() * static PIX *pixDisplayOutliers() * PIX *recogDisplayOutlier() * PIX *recogShowMatchesInRange() * PIX *recogShowMatch() * * These abbreviations are for the type of template to be used: * * SI (for the scanned images) * * WNL (for width-normalized lines, formed by first skeletonizing * the scanned images, and then dilating to a fixed width) * These abbreviations are for the type of recognizer: * * BAR (book-adapted recognizer; the best type; can do identification * with unscaled images and separation of touching characters. * * BSR (bootstrap recognizer; used if more labeled templates are * required for a BAR, either for finding more templates from * the book, or making a hybrid BAR/BSR. * * The recog struct typically holds two versions of the input templates * (e.g. from a pixa) that were used to generate it. One version is * the unscaled input templates. The other version is the one that * will be used by the recog to identify unlabeled data. That version * depends on the input parameters when the recog is created. The choices * for the latter version, and their suggested use, are: * (1) unscaled SI -- typical for BAR, generated from book images * (2) unscaled WNL -- ditto * (3) scaled SI -- typical for recognizers containing template * images from sources other than the book to be recognized * (4) scaled WNL -- ditto * For cases (3) and (4), we recommend scaling to fixed height; e.g., * scalew = 0, scaleh = 40. * When using WNL, we recommend using a width of 5 in the template * and 4 in the unlabeled data. * It appears that better results for a BAR are usually obtained using * SI than WNL, but more experimentation is needed. * * This utility is designed to build recognizers that are specifically * adapted from a large amount of material, such as a book. These * use labeled templates taken from the material, and not scaled. * In addition, two special recognizers are useful: * (1) Bootstrap recognizer (BSR). This uses height-scaled templates, * that have been extended with several repetitions in one of two ways: * (a) aniotropic width scaling (for either SI or WNL) * (b) iterative erosions/dilations (for SI). * (2) Outlier removal. This uses height scaled templates. It can be * implemented without using templates that are aligned averages of all * templates in a class. * * Recognizers are inexpensive to generate, for example, from a pixa * of labeled templates. The general process of building a BAR is * to start with labeled templates, e.g., in a pixa, make a BAR, and * analyze new samples from the book to augment the BAR until it has * enough samples for each character class. Along the way, samples * from a BSR may be added for help in training. If not enough samples * are available for the BAR, it can finally be augmented with BSR * samples, in which case the resulting hybrid BAR/BSR recognizer * must work on scaled images. * * Here are the steps in doing recog training: * A. Generate a BAR from any existing labeled templates * (1) Create a recog and add the templates, using recogAddSample(). * This stores the unscaled templates. * [Note: this can be done in one step if the labeled templates are put * into a pixa: * L_Recog *rec = recogCreateFromPixa(pixa, ...); ] * (2) Call recogTrainingFinished() to generate the (sometimes modified) * templates to be used for correlation. * (3) Optionally, remove outliers. * If there are sufficient samples in the classes, we're done. Otherwise, * B. Try to get more samples from the book to pad the BAR. * (1) Save the unscaled, labeled templates from the BAR. * (2) Supplement the BAR with bootstrap templates to make a hybrid BAR/BSR. * (3) Do recognition on more unlabeled images, scaled to a fixed height * (4) Add the unscaled, labeled images to the saved set. * (5) Optionally, remove outliers. * If there are sufficient samples in the classes, we're done. Otherwise, * C. For classes without a sufficient number of templates, we can * supplement the BAR with templates from a BSR (a hybrid RAR/BSR), * and do recognition scaled to a fixed height. * * Here are several methods that can be used for identifying outliers: * (1) Compute average templates for each class and remove a candidate * that is poorly correlated with the average. This is the most * simple method. recogRemoveOutliers1() uses this, supplemented with * a second threshold and a target number of templates to be saved. * (2) Compute average templates for each class and remove a candidate * that is more highly correlated with the average of some other class. * This does not require setting a threshold for the correlation. * recogRemoveOutliers2() uses this method, supplemented with a minimum * correlation score. * (3) For each candidate, find the average correlation with other * members of its class, and remove those that have a relatively * low average correlation. This is similar to (1), gives comparable * results and because it does not use average templates, it requires * a bit more computation. **/ #include
* Notes: * (1) Training is restricted to the addition of a single * character in an arbitrary (e.g., UTF8) charset * (2) If box != null, it should represent the location in %pixs * of the character image. **/ l_ok recogTrainLabeled(L_RECOG *recog, PIX *pixs, BOX *box, char *text, l_int32 debug) { l_int32 ret; PIX *pix; PROCNAME("recogTrainLabeled"); if (!recog) return ERROR_INT("recog not defined", procName, 1); if (!pixs) return ERROR_INT("pixs not defined", procName, 1); /* Prepare the sample to be added. This step also acts * as a filter, and can invalidate pixs as a template. */ ret = recogProcessLabeled(recog, pixs, box, text, &pix); if (ret) { pixDestroy(&pix); L_WARNING("failure to get sample '%s' for training\n", procName, text); return 1; } recogAddSample(recog, pix, debug); pixDestroy(&pix); return 0; } /*! * \brief recogProcessLabeled() * * \param[in] recog in training mode * \param[in] pixs if depth > 1, will be thresholded to 1 bpp * \param[in] box [optional] cropping box * \param[in] text [optional] if null, use text field in pix * \param[out] ppix addr of pix, 1 bpp, labeled * \return 0 if OK, 1 on error * *
* Notes: * (1) This crops and binarizes the input image, generating a pix * of one character where the charval is inserted into the pix. **/ l_ok recogProcessLabeled(L_RECOG *recog, PIX *pixs, BOX *box, char *text, PIX **ppix) { char *textdata; l_int32 textinpix, textin, nsets; NUMA *na; PIX *pix1, *pix2, *pix3, *pix4; PROCNAME("recogProcessLabeled"); if (!ppix) return ERROR_INT("&pix not defined", procName, 1); *ppix = NULL; if (!recog) return ERROR_INT("recog not defined", procName, 1); if (!pixs) return ERROR_INT("pixs not defined", procName, 1); /* Find the text; this will be stored with the output images */ textin = text && (text[0] != '\0'); textinpix = (pixs->text && (pixs->text[0] != '\0')); if (!textin && !textinpix) { L_ERROR("no text: %d\n", procName, recog->num_samples); return 1; } textdata = (textin) ? text : pixs->text; /* do not free */ /* Crop and binarize if necessary */ if (box) pix1 = pixClipRectangle(pixs, box, NULL); else pix1 = pixClone(pixs); if (pixGetDepth(pix1) > 1) pix2 = pixConvertTo1(pix1, recog->threshold); else pix2 = pixClone(pix1); pixDestroy(&pix1); /* Remove isolated noise, using as a criterion all components * that are removed by a vertical opening of size 5. */ pix3 = pixMorphSequence(pix2, "o1.5", 0); /* seed */ pixSeedfillBinary(pix3, pix3, pix2, 8); /* fill from seed; clip to pix2 */ pixDestroy(&pix2); /* Clip to foreground */ pixClipToForeground(pix3, &pix4, NULL); pixDestroy(&pix3); if (!pix4) return ERROR_INT("pix4 is empty", procName, 1); /* Verify that if there is more than 1 c.c., they all have * horizontal overlap */ na = pixCountByColumn(pix4, NULL); numaCountNonzeroRuns(na, &nsets); numaDestroy(&na); if (nsets > 1) { L_WARNING("found %d sets of horiz separated c.c.; skipping\n", procName, nsets); pixDestroy(&pix4); return 1; } pixSetText(pix4, textdata); *ppix = pix4; return 0; } /*! * \brief recogAddSample() * * \param[in] recog * \param[in] pix a single character, 1 bpp * \param[in] debug * \return 0 if OK, 1 on error * *
* Notes: * (1) The pix is 1 bpp, with the character string label embedded. * (2) The pixaa_u array of the recog is initialized to accept * up to 256 different classes. When training is finished, * the arrays are truncated to the actual number of classes. * To pad an existing recog from the boot recognizers, training * is started again; if samples from a new class are added, * the pixaa_u array is extended by adding a pixa to hold them. **/ l_ok recogAddSample(L_RECOG *recog, PIX *pix, l_int32 debug) { char *text; l_int32 npa, charint, index; PIXA *pixa1; PIXAA *paa; PROCNAME("recogAddSample"); if (!recog) return ERROR_INT("recog not defined", procName, 1); if (!pix || pixGetDepth(pix) != 1) return ERROR_INT("pix not defined or not 1 bpp\n", procName, 1); if (recog->train_done) return ERROR_INT("not added: training has been completed", procName, 1); paa = recog->pixaa_u; /* Make sure the character is in the set */ text = pixGetText(pix); if (l_convertCharstrToInt(text, &charint) == 1) { L_ERROR("invalid text: %s\n", procName, text); return 1; } /* Determine the class array index. Check if the class * alreadly exists, and if not, add it. */ if (recogGetClassIndex(recog, charint, text, &index) == 1) { /* New class must be added */ npa = pixaaGetCount(paa, NULL); if (index > npa) { L_ERROR("oops: bad index %d > npa %d!!\n", procName, index, npa); return 1; } if (index == npa) { /* paa needs to be extended */ L_INFO("Adding new class and pixa: index = %d, text = %s\n", procName, index, text); pixa1 = pixaCreate(10); pixaaAddPixa(paa, pixa1, L_INSERT); } } if (debug) { L_INFO("Identified text label: %s\n", procName, text); L_INFO("Identified: charint = %d, index = %d\n", procName, charint, index); } /* Insert the unscaled character image into the right pixa. * (Unscaled images are required to split touching characters.) */ recog->num_samples++; pixaaAddPix(paa, index, pix, NULL, L_COPY); return 0; } /*! * \brief recogModifyTemplate() * * \param[in] recog * \param[in] pixs 1 bpp, to be optionally scaled and turned into * strokes of fixed width * \return pixd modified pix if OK, NULL on error */ PIX * recogModifyTemplate(L_RECOG *recog, PIX *pixs) { l_int32 w, h, empty; PIX *pix1, *pix2; PROCNAME("recogModifyTemplate"); if (!recog) return (PIX *)ERROR_PTR("recog not defined", procName, NULL); if (!pixs) return (PIX *)ERROR_PTR("pixs not defined", procName, NULL); /* Scale first */ pixGetDimensions(pixs, &w, &h, NULL); if ((recog->scalew == 0 || recog->scalew == w) && (recog->scaleh == 0 || recog->scaleh == h)) { /* no scaling */ pix1 = pixCopy(NULL, pixs); } else { pix1 = pixScaleToSize(pixs, recog->scalew, recog->scaleh); } if (!pix1) return (PIX *)ERROR_PTR("pix1 not made", procName, NULL); /* Then optionally convert to lines */ if (recog->linew <= 0) { pix2 = pixClone(pix1); } else { pix2 = pixSetStrokeWidth(pix1, recog->linew, 1, 8); } pixDestroy(&pix1); if (!pix2) return (PIX *)ERROR_PTR("pix2 not made", procName, NULL); /* Make sure we still have some pixels */ pixZero(pix2, &empty); if (empty) { pixDestroy(&pix2); return (PIX *)ERROR_PTR("modified template has no pixels", procName, NULL); } return pix2; } /*! * \brief recogAverageSamples() * * \param[in] precog addr of existing recog; may be destroyed * \param[in] debug * \return 0 on success, 1 on failure * *
* Notes: * (1) This is only called in two situations: * (a) When splitting characters using either the DID method * recogDecode() or the the greedy splitter * recogCorrelationBestRow() * (b) By a special recognizer that is used to remove outliers. * Both unscaled and scaled inputs are averaged. * (2) If the data in any class is nonexistent (no samples), or * very bad (no fg pixels in the average), or if the ratio * of max/min average unscaled class template heights is * greater than max_ht_ratio, this destroys the recog. * The caller must check the return value of the recog. * (3) Set debug = 1 to view the resulting templates and their centroids. **/ l_int32 recogAverageSamples(L_RECOG **precog, l_int32 debug) { l_int32 i, nsamp, size, area, bx, by, badclass; l_float32 x, y, hratio; BOX *box; PIXA *pixa1; PIX *pix1, *pix2, *pix3; PTA *pta1; L_RECOG *recog; PROCNAME("recogAverageSamples"); if (!precog) return ERROR_INT("&recog not defined", procName, 1); if ((recog = *precog) == NULL) return ERROR_INT("recog not defined", procName, 1); if (recog->ave_done) { if (debug) /* always do this if requested */ recogShowAverageTemplates(recog); return 0; } /* Remove any previous averaging data */ size = recog->setsize; pixaDestroy(&recog->pixa_u); ptaDestroy(&recog->pta_u); numaDestroy(&recog->nasum_u); recog->pixa_u = pixaCreate(size); recog->pta_u = ptaCreate(size); recog->nasum_u = numaCreate(size); pixaDestroy(&recog->pixa); ptaDestroy(&recog->pta); numaDestroy(&recog->nasum); recog->pixa = pixaCreate(size); recog->pta = ptaCreate(size); recog->nasum = numaCreate(size); /* Unscaled bitmaps: compute averaged bitmap, centroid, and fg area. * Note that when we threshold to 1 bpp the 8 bpp averaged template * that is returned from the accumulator, it will not be cropped * to the foreground. We must crop it, because the correlator * makes that assumption and will return a zero value if the * width or height of the two images differs by several pixels. * But cropping to fg can cause the value of the centroid to * change, if bx > 0 or by > 0. */ badclass = FALSE; for (i = 0; i < size; i++) { pixa1 = pixaaGetPixa(recog->pixaa_u, i, L_CLONE); pta1 = ptaaGetPta(recog->ptaa_u, i, L_CLONE); nsamp = pixaGetCount(pixa1); nsamp = L_MIN(nsamp, 256); /* we only use the first 256 */ if (nsamp == 0) { /* no information for this class */ L_ERROR("no samples in class %d\n", procName, i); badclass = TRUE; pixaDestroy(&pixa1); ptaDestroy(&pta1); break; } else { pixaAccumulateSamples(pixa1, pta1, &pix1, &x, &y); pix2 = pixThresholdToBinary(pix1, L_MAX(1, nsamp / 2)); pixInvert(pix2, pix2); pixClipToForeground(pix2, &pix3, &box); if (!box) { L_ERROR("no fg pixels in average for uclass %d\n", procName, i); badclass = TRUE; pixDestroy(&pix1); pixDestroy(&pix2); pixaDestroy(&pixa1); ptaDestroy(&pta1); break; } else { boxGetGeometry(box, &bx, &by, NULL, NULL); pixaAddPix(recog->pixa_u, pix3, L_INSERT); ptaAddPt(recog->pta_u, x - bx, y - by); /* correct centroid */ pixCountPixels(pix3, &area, recog->sumtab); numaAddNumber(recog->nasum_u, area); /* foreground */ boxDestroy(&box); } pixDestroy(&pix1); pixDestroy(&pix2); } pixaDestroy(&pixa1); ptaDestroy(&pta1); } /* Are any classes bad? If so, destroy the recog and return an error */ if (badclass) { recogDestroy(precog); return ERROR_INT("at least 1 bad class; destroying recog", procName, 1); } /* Get the range of sizes of the unscaled average templates. * Reject if the height ratio is too large. */ pixaSizeRange(recog->pixa_u, &recog->minwidth_u, &recog->minheight_u, &recog->maxwidth_u, &recog->maxheight_u); hratio = (l_float32)recog->maxheight_u / (l_float32)recog->minheight_u; if (hratio > recog->max_ht_ratio) { L_ERROR("ratio of max/min height of average templates = %4.1f;" " destroying recog\n", procName, hratio); recogDestroy(precog); return 1; } /* Scaled bitmaps: compute averaged bitmap, centroid, and fg area */ for (i = 0; i < size; i++) { pixa1 = pixaaGetPixa(recog->pixaa, i, L_CLONE); pta1 = ptaaGetPta(recog->ptaa, i, L_CLONE); nsamp = pixaGetCount(pixa1); nsamp = L_MIN(nsamp, 256); /* we only use the first 256 */ pixaAccumulateSamples(pixa1, pta1, &pix1, &x, &y); pix2 = pixThresholdToBinary(pix1, L_MAX(1, nsamp / 2)); pixInvert(pix2, pix2); pixClipToForeground(pix2, &pix3, &box); if (!box) { L_ERROR("no fg pixels in average for sclass %d\n", procName, i); badclass = TRUE; pixDestroy(&pix1); pixDestroy(&pix2); pixaDestroy(&pixa1); ptaDestroy(&pta1); break; } else { boxGetGeometry(box, &bx, &by, NULL, NULL); pixaAddPix(recog->pixa, pix3, L_INSERT); ptaAddPt(recog->pta, x - bx, y - by); /* correct centroid */ pixCountPixels(pix3, &area, recog->sumtab); numaAddNumber(recog->nasum, area); /* foreground */ boxDestroy(&box); } pixDestroy(&pix1); pixDestroy(&pix2); pixaDestroy(&pixa1); ptaDestroy(&pta1); } if (badclass) { recogDestroy(precog); return ERROR_INT("at least 1 bad class; destroying recog", procName, 1); } /* Get the range of widths of the scaled average templates */ pixaSizeRange(recog->pixa, &recog->minwidth, NULL, &recog->maxwidth, NULL); /* Get dimensions useful for splitting */ recog->min_splitw = L_MAX(5, recog->minwidth_u - 5); recog->max_splith = recog->maxheight_u + 12; /* allow for skew */ if (debug) recogShowAverageTemplates(recog); recog->ave_done = TRUE; return 0; } /*! * \brief pixaAccumulateSamples() * * \param[in] pixa of samples from the same class, 1 bpp * \param[in] pta [optional] of centroids of the samples * \param[out] ppixd accumulated samples, 8 bpp * \param[out] px [optional] average x coordinate of centroids * \param[out] py [optional] average y coordinate of centroids * \return 0 on success, 1 on failure * *
* Notes: * (1) This generates an aligned (by centroid) sum of the input pix. * (2) We use only the first 256 samples; that's plenty. * (3) If pta is not input, we generate two tables, and discard * after use. If this is called many times, it is better * to precompute the pta. **/ l_int32 pixaAccumulateSamples(PIXA *pixa, PTA *pta, PIX **ppixd, l_float32 *px, l_float32 *py) { l_int32 i, n, maxw, maxh, xdiff, ydiff; l_int32 *centtab, *sumtab; l_float32 xc, yc, xave, yave; PIX *pix1, *pix2, *pixsum; PTA *ptac; PROCNAME("pixaAccumulateSamples"); if (px) *px = 0; if (py) *py = 0; if (!ppixd) return ERROR_INT("&pixd not defined", procName, 1); *ppixd = NULL; if (!pixa) return ERROR_INT("pixa not defined", procName, 1); n = pixaGetCount(pixa); if (pta && ptaGetCount(pta) != n) return ERROR_INT("pta count differs from pixa count", procName, 1); n = L_MIN(n, 256); /* take the first 256 only */ if (n == 0) return ERROR_INT("pixa array empty", procName, 1); /* Find the centroids */ if (pta) { ptac = ptaClone(pta); } else { /* generate them here */ ptac = ptaCreate(n); centtab = makePixelCentroidTab8(); sumtab = makePixelSumTab8(); for (i = 0; i < n; i++) { pix1 = pixaGetPix(pixa, i, L_CLONE); pixCentroid(pix1, centtab, sumtab, &xc, &yc); ptaAddPt(ptac, xc, yc); } LEPT_FREE(centtab); LEPT_FREE(sumtab); } /* Find the average value of the centroids */ xave = yave = 0; for (i = 0; i < n; i++) { ptaGetPt(pta, i, &xc, &yc); xave += xc; yave += yc; } xave = xave / (l_float32)n; yave = yave / (l_float32)n; if (px) *px = xave; if (py) *py = yave; /* Place all pix with their centroids located at the average * centroid value, and sum the results. Make the accumulator * image slightly larger than the largest sample to insure * that all pixels are represented in the accumulator. */ pixaSizeRange(pixa, NULL, NULL, &maxw, &maxh); pixsum = pixInitAccumulate(maxw + 5, maxh + 5, 0); pix1 = pixCreate(maxw, maxh, 1); for (i = 0; i < n; i++) { pix2 = pixaGetPix(pixa, i, L_CLONE); ptaGetPt(ptac, i, &xc, &yc); xdiff = (l_int32)(xave - xc); ydiff = (l_int32)(yave - yc); pixClearAll(pix1); pixRasterop(pix1, xdiff, ydiff, maxw, maxh, PIX_SRC, pix2, 0, 0); pixAccumulate(pixsum, pix1, L_ARITH_ADD); pixDestroy(&pix2); } *ppixd = pixFinalAccumulate(pixsum, 0, 8); pixDestroy(&pix1); pixDestroy(&pixsum); ptaDestroy(&ptac); return 0; } /*! * \brief recogTrainingFinished() * * \param[in] precog addr of recog * \param[in] modifyflag 1 to use recogModifyTemplate(); 0 otherwise * \param[in] minsize set to -1 for default * \param[in] minfract set to -1.0 for default * \return 0 if OK, 1 on error (input recog will be destroyed) * *
* Notes: * (1) This must be called after all training samples have been added. * (2) If the templates are not good enough, the recog input is destroyed. * (3) Usually, %modifyflag == 1, because we want to apply * recogModifyTemplate() to generate the actual templates * that will be used. The one exception is when reading a * serialized recog: there we want to put the same set of * templates in both the unscaled and modified pixaa. * See recogReadStream() to see why we do this. * (4) See recogTemplatesAreOK() for %minsize and %minfract usage. * (5) The following things are done here: * (a) Allocate (or reallocate) storage for (possibly) modified * bitmaps, centroids, and fg areas. * (b) Generate the (possibly) modified bitmaps. * (c) Compute centroid and fg area data for both unscaled and * modified bitmaps. * (d) Truncate the pixaa, ptaa and numaa arrays down from * 256 to the actual size. * (6) Putting these operations here makes it simple to recompute * the recog with different modifications on the bitmaps. * (7) Call recogShowContent() to display the templates, both * unscaled and modified. **/ l_ok recogTrainingFinished(L_RECOG **precog, l_int32 modifyflag, l_int32 minsize, l_float32 minfract) { l_int32 ok, i, j, size, nc, ns, area; l_float32 xave, yave; PIX *pix, *pixd; PIXA *pixa; PIXAA *paa; PTA *pta; PTAA *ptaa; L_RECOG *recog; PROCNAME("recogTrainingFinished"); if (!precog) return ERROR_INT("&recog not defined", procName, 1); if ((recog = *precog) == NULL) return ERROR_INT("recog not defined", procName, 1); if (recog->train_done) return 0; /* Test the input templates */ recogTemplatesAreOK(recog, minsize, minfract, &ok); if (!ok) { recogDestroy(precog); return ERROR_INT("bad templates", procName, 1); } /* Generate the storage for the possibly-scaled training bitmaps */ size = recog->maxarraysize; paa = pixaaCreate(size); pixa = pixaCreate(1); pixaaInitFull(paa, pixa); pixaDestroy(&pixa); pixaaDestroy(&recog->pixaa); recog->pixaa = paa; /* Generate the storage for the unscaled centroid training data */ ptaa = ptaaCreate(size); pta = ptaCreate(0); ptaaInitFull(ptaa, pta); ptaaDestroy(&recog->ptaa_u); recog->ptaa_u = ptaa; /* Generate the storage for the possibly-scaled centroid data */ ptaa = ptaaCreate(size); ptaaInitFull(ptaa, pta); ptaDestroy(&pta); ptaaDestroy(&recog->ptaa); recog->ptaa = ptaa; /* Generate the storage for the fg area data */ numaaDestroy(&recog->naasum_u); numaaDestroy(&recog->naasum); recog->naasum_u = numaaCreateFull(size, 0); recog->naasum = numaaCreateFull(size, 0); paa = recog->pixaa_u; nc = recog->setsize; for (i = 0; i < nc; i++) { pixa = pixaaGetPixa(paa, i, L_CLONE); ns = pixaGetCount(pixa); for (j = 0; j < ns; j++) { /* Save centroid and area data for the unscaled pix */ pix = pixaGetPix(pixa, j, L_CLONE); pixCentroid(pix, recog->centtab, recog->sumtab, &xave, &yave); ptaaAddPt(recog->ptaa_u, i, xave, yave); pixCountPixels(pix, &area, recog->sumtab); numaaAddNumber(recog->naasum_u, i, area); /* foreground */ /* Insert the (optionally) scaled character image, and * save centroid and area data for it */ if (modifyflag == 1) pixd = recogModifyTemplate(recog, pix); else pixd = pixClone(pix); if (pixd) { pixaaAddPix(recog->pixaa, i, pixd, NULL, L_INSERT); pixCentroid(pixd, recog->centtab, recog->sumtab, &xave, &yave); ptaaAddPt(recog->ptaa, i, xave, yave); pixCountPixels(pixd, &area, recog->sumtab); numaaAddNumber(recog->naasum, i, area); } else { L_ERROR("failed: modified template for class %d, sample %d\n", procName, i, j); } pixDestroy(&pix); } pixaDestroy(&pixa); } /* Truncate the arrays to those with non-empty containers */ pixaaTruncate(recog->pixaa_u); pixaaTruncate(recog->pixaa); ptaaTruncate(recog->ptaa_u); ptaaTruncate(recog->ptaa); numaaTruncate(recog->naasum_u); numaaTruncate(recog->naasum); recog->train_done = TRUE; return 0; } /*! * \brief recogTemplatesAreOK() * * \param[in] recog * \param[in] minsize set to -1 for default * \param[in] minfract set to -1.0 for default * \param[out] pok set to 1 if template set is valid; 0 otherwise * \return 1 on error; 0 otherwise. An invalid template set is not an error. * *
* Notes: * (1) This is called by recogTrainingFinished(). A return value of 0 * will cause recogTrainingFinished() to destroy the recog. * (2) %minsize is the minimum number of samples required for * the class; -1 uses the default * (3) %minfract is the minimum fraction of classes required for * the recog to be usable; -1.0 uses the default **/ static l_int32 recogTemplatesAreOK(L_RECOG *recog, l_int32 minsize, l_float32 minfract, l_int32 *pok) { l_int32 i, n, validsets, nt; l_float32 ratio; NUMA *na; PROCNAME("recogTemplatesAreOK"); if (!pok) return ERROR_INT("&ok not defined", procName, 1); *pok = 0; if (!recog) return ERROR_INT("recog not defined", procName, 1); minsize = (minsize < 0) ? DefaultMinSetSize : minsize; minfract = (minfract < 0) ? DefaultMinSetFract : minfract; n = pixaaGetCount(recog->pixaa_u, &na); validsets = 0; for (i = 0, validsets = 0; i < n; i++) { numaGetIValue(na, i, &nt); if (nt >= minsize) validsets++; } numaDestroy(&na); ratio = (l_float32)validsets / (l_float32)recog->charset_size; *pok = (ratio >= minfract) ? 1 : 0; return 0; } /*! * \brief recogFilterPixaBySize() * * \param[in] pixas labeled templates * \param[in] setsize size of character set (number of classes) * \param[in] maxkeep max number of templates to keep in a class * \param[in] max_ht_ratio max allowed height ratio (see below) * \param[out] pna [optional] debug output, giving the number * in each class after filtering; use NULL to skip * \return pixa filtered templates, or NULL on error * *
* Notes: * (1) The basic assumption is that the most common and larger * templates in each class are more likely to represent the * characters we are interested in. For example, larger digits * are more likely to represent page numbers, and smaller digits * could be data in tables. Therefore, we bias the first * stage of filtering toward the larger characters by removing * very small ones, and select based on proximity of the * remaining characters to median height. * (2) For each of the %setsize classes, order the templates * increasingly by height. Take the rank 0.9 height. Eliminate * all templates that are shorter by more than %max_ht_ratio. * Of the remaining ones, select up to %maxkeep that are closest * in rank order height to the median template. **/ PIXA * recogFilterPixaBySize(PIXA *pixas, l_int32 setsize, l_int32 maxkeep, l_float32 max_ht_ratio, NUMA **pna) { l_int32 i, j, h90, hj, j1, j2, j90, n, nc; l_float32 ratio; NUMA *na; PIXA *pixa1, *pixa2, *pixa3, *pixa4, *pixa5; PIXAA *paa; PROCNAME("recogFilterPixaBySize"); if (pna) *pna = NULL; if (!pixas) return (PIXA *)ERROR_PTR("pixas not defined", procName, NULL); if ((paa = recogSortPixaByClass(pixas, setsize)) == NULL) return (PIXA *)ERROR_PTR("paa not made", procName, NULL); nc = pixaaGetCount(paa, NULL); na = (pna) ? numaCreate(0) : NULL; if (pna) *pna = na; pixa5 = pixaCreate(0); for (i = 0; i < nc; i++) { pixa1 = pixaaGetPixa(paa, i, L_CLONE); if ((n = pixaGetCount(pixa1)) == 0) { pixaDestroy(&pixa1); continue; } pixa2 = pixaSort(pixa1, L_SORT_BY_HEIGHT, L_SORT_INCREASING, NULL, L_COPY); j90 = (l_int32)(0.9 * n); pixaGetPixDimensions(pixa2, j90, NULL, &h90, NULL); pixa3 = pixaCreate(n); for (j = 0; j < n; j++) { pixaGetPixDimensions(pixa2, j, NULL, &hj, NULL); ratio = (l_float32)h90 / (l_float32)hj; if (ratio <= max_ht_ratio) pixaAddPix(pixa3, pixaGetPix(pixa2, j, L_COPY), L_INSERT); } n = pixaGetCount(pixa3); if (n <= maxkeep) { pixa4 = pixaCopy(pixa3, L_CLONE); } else { j1 = (n - maxkeep) / 2; j2 = j1 + maxkeep - 1; pixa4 = pixaSelectRange(pixa3, j1, j2, L_CLONE); } if (na) numaAddNumber(na, pixaGetCount(pixa4)); pixaJoin(pixa5, pixa4, 0, -1); pixaDestroy(&pixa1); pixaDestroy(&pixa2); pixaDestroy(&pixa3); pixaDestroy(&pixa4); } pixaaDestroy(&paa); return pixa5; } /*! * \brief recogSortPixaByClass() * * \param[in] pixa labeled templates * \param[in] setsize size of character set (number of classes) * \return paa pixaa where each pixa has templates for one class, * or null on error */ PIXAA * recogSortPixaByClass(PIXA *pixa, l_int32 setsize) { PIXAA *paa; L_RECOG *recog; PROCNAME("recogSortPixaByClass"); if (!pixa) return (PIXAA *)ERROR_PTR("pixa not defined", procName, NULL); if ((recog = recogCreateFromPixaNoFinish(pixa, 0, 0, 0, 0, 0)) == NULL) return (PIXAA *)ERROR_PTR("recog not made", procName, NULL); paa = recog->pixaa_u; /* grab the paa of unscaled templates */ recog->pixaa_u = NULL; recogDestroy(&recog); return paa; } /*! * \brief recogRemoveOutliers1() * * \param[in] precog addr of recog with unscaled labeled templates * \param[in] minscore keep everything with at least this score * \param[in] mintarget minimum desired number to retain if possible * \param[in] minsize minimum number of samples required for a class * \param[out] ppixsave [optional debug] saved templates, with scores * \param[out] ppixrem [optional debug] removed templates, with scores * \return 0 if OK, 1 on error. * *
* Notes: * (1) This is a convenience wrapper when using default parameters * for the recog. See pixaRemoveOutliers1() for details. * (2) If this succeeds, the new recog replaces the input recog; * if it fails, the input recog is destroyed. **/ l_ok recogRemoveOutliers1(L_RECOG **precog, l_float32 minscore, l_int32 mintarget, l_int32 minsize, PIX **ppixsave, PIX **ppixrem) { PIXA *pixa1, *pixa2; L_RECOG *recog; PROCNAME("recogRemoveOutliers1"); if (!precog) return ERROR_INT("&recog not defined", procName, 1); if (*precog == NULL) return ERROR_INT("recog not defined", procName, 1); /* Extract the unscaled templates */ pixa1 = recogExtractPixa(*precog); recogDestroy(precog); pixa2 = pixaRemoveOutliers1(pixa1, minscore, mintarget, minsize, ppixsave, ppixrem); pixaDestroy(&pixa1); if (!pixa2) return ERROR_INT("failure to remove outliers", procName, 1); recog = recogCreateFromPixa(pixa2, 0, 0, 0, 150, 1); pixaDestroy(&pixa2); if (!recog) return ERROR_INT("failure to make recog from pixa sans outliers", procName, 1); *precog = recog; return 0; } /*! * \brief pixaRemoveOutliers1() * * \param[in] pixas unscaled labeled templates * \param[in] minscore keep everything with at least this score; * use -1.0 for default. * \param[in] mintarget minimum desired number to retain if possible; * use -1 for default. * \param[in] minsize minimum number of samples required for a class; * use -1 for default. * \param[out] ppixsave [optional debug] saved templates, with scores * \param[out] ppixrem [optional debug] removed templates, with scores * \return pixa of unscaled templates to be kept, or NULL on error * *
* Notes: * (1) Removing outliers is particularly important when recognition * goes against all the samples in the training set, as opposed * to the averages for each class. The reason is that we get * an identification error if a mislabeled template is a best * match for an input sample. * (2) Because the score values depend strongly on the quality * of the character images, to avoid losing too many samples * we supplement a minimum score for retention with a score * necessary to acquire the minimum target number of templates. * To do this we are willing to use a lower threshold, * LowerScoreThreshold, on the score. Consequently, with * poor quality templates, we may keep samples with a score * less than %minscore, but never less than LowerScoreThreshold. * And if the number of samples is less than %minsize, we do * not use any. * (3) This is meant to be used on a BAR, where the templates all * come from the same book; use minscore ~0.75. * (4) Method: make a scaled recog from the input %pixas. Then, * for each class: generate the averages, match each * scaled template against the average, and save unscaled * templates that had a sufficiently good match. **/ PIXA * pixaRemoveOutliers1(PIXA *pixas, l_float32 minscore, l_int32 mintarget, l_int32 minsize, PIX **ppixsave, PIX **ppixrem) { l_int32 i, j, debug, n, area1, area2; l_float32 x1, y1, x2, y2, minfract, score, rankscore, threshscore; NUMA *nasum, *narem, *nasave, *nascore; PIX *pix1, *pix2; PIXA *pixa, *pixarem, *pixad; PTA *pta; L_RECOG *recog; PROCNAME("pixaRemoveOutliers1"); if (ppixsave) *ppixsave = NULL; if (ppixrem) *ppixrem = NULL; if (!pixas) return (PIXA *)ERROR_PTR("pixas not defined", procName, NULL); minscore = L_MIN(minscore, 1.0); if (minscore <= 0.0) minscore = DefaultMinScore; mintarget = L_MIN(mintarget, 3); if (mintarget <= 0) mintarget = DefaultMinTarget; if (minsize < 0) minsize = DefaultMinSetSize; /* Make a special height-scaled recognizer with average templates */ debug = (ppixsave || ppixrem) ? 1 : 0; recog = recogCreateFromPixa(pixas, 0, 40, 0, 128, 1); if (!recog) return (PIXA *)ERROR_PTR("bad pixas; recog not made", procName, NULL); recogAverageSamples(&recog, debug); if (!recog) return (PIXA *)ERROR_PTR("bad templates", procName, NULL); nasave = (ppixsave) ? numaCreate(0) : NULL; pixarem = (ppixrem) ? pixaCreate(0) : NULL; narem = (ppixrem) ? numaCreate(0) : NULL; pixad = pixaCreate(0); for (i = 0; i < recog->setsize; i++) { /* Access the average template and values for scaled * images in this class */ pix1 = pixaGetPix(recog->pixa, i, L_CLONE); ptaGetPt(recog->pta, i, &x1, &y1); numaGetIValue(recog->nasum, i, &area1); /* Get the scores for each sample in the class */ pixa = pixaaGetPixa(recog->pixaa, i, L_CLONE); pta = ptaaGetPta(recog->ptaa, i, L_CLONE); /* centroids */ nasum = numaaGetNuma(recog->naasum, i, L_CLONE); /* fg areas */ n = pixaGetCount(pixa); nascore = numaCreate(n); for (j = 0; j < n; j++) { pix2 = pixaGetPix(pixa, j, L_CLONE); ptaGetPt(pta, j, &x2, &y2); /* centroid average */ numaGetIValue(nasum, j, &area2); /* fg sum average */ pixCorrelationScoreSimple(pix1, pix2, area1, area2, x1 - x2, y1 - y2, 5, 5, recog->sumtab, &score); numaAddNumber(nascore, score); if (debug && score == 0.0) /* typ. large size difference */ fprintf(stderr, "Got 0 score for i = %d, j = %d\n", i, j); pixDestroy(&pix2); } pixDestroy(&pix1); /* Find the rankscore, corresponding to the 1.0 - minfract. * To attempt to maintain the minfract of templates, use as a * cutoff the minimum of minscore and the rank score. However, * no template is saved with an actual score less than * that at least one template is kept. */ minfract = (l_float32)mintarget / (l_float32)n; numaGetRankValue(nascore, 1.0 - minfract, NULL, 0, &rankscore); threshscore = L_MAX(LowerScoreThreshold, L_MIN(minscore, rankscore)); if (debug) { L_INFO("minscore = %4.2f, rankscore = %4.2f, threshscore = %4.2f\n", procName, minscore, rankscore, threshscore); } /* Save templates that are at or above threshold. * Toss any classes with less than %minsize templates. */ for (j = 0; j < n; j++) { numaGetFValue(nascore, j, &score); pix1 = pixaaGetPix(recog->pixaa_u, i, j, L_COPY); if (score >= threshscore && n >= minsize) { pixaAddPix(pixad, pix1, L_INSERT); if (nasave) numaAddNumber(nasave, score); } else if (debug) { pixaAddPix(pixarem, pix1, L_INSERT); numaAddNumber(narem, score); } else { pixDestroy(&pix1); } } pixaDestroy(&pixa); ptaDestroy(&pta); numaDestroy(&nasum); numaDestroy(&nascore); } if (ppixsave) { *ppixsave = pixDisplayOutliers(pixad, nasave); numaDestroy(&nasave); } if (ppixrem) { *ppixrem = pixDisplayOutliers(pixarem, narem); pixaDestroy(&pixarem); numaDestroy(&narem); } recogDestroy(&recog); return pixad; } /*! * \brief recogRemoveOutliers2() * * \param[in] precog addr of recog with unscaled labeled templates * \param[in] minscore keep everything with at least this score * \param[in] minsize minimum number of samples required for a class * \param[out] ppixsave [optional debug] saved templates, with scores * \param[out] ppixrem [optional debug] removed templates, with scores * \return 0 if OK, 1 on error. * *
* Notes: * (1) This is a convenience wrapper when using default parameters * for the recog. See pixaRemoveOutliers2() for details. * (2) If this succeeds, the new recog replaces the input recog; * if it fails, the input recog is destroyed. **/ l_ok recogRemoveOutliers2(L_RECOG **precog, l_float32 minscore, l_int32 minsize, PIX **ppixsave, PIX **ppixrem) { PIXA *pixa1, *pixa2; L_RECOG *recog; PROCNAME("recogRemoveOutliers2"); if (!precog) return ERROR_INT("&recog not defined", procName, 1); if (*precog == NULL) return ERROR_INT("recog not defined", procName, 1); /* Extract the unscaled templates */ pixa1 = recogExtractPixa(*precog); recogDestroy(precog); pixa2 = pixaRemoveOutliers2(pixa1, minscore, minsize, ppixsave, ppixrem); pixaDestroy(&pixa1); if (!pixa2) return ERROR_INT("failure to remove outliers", procName, 1); recog = recogCreateFromPixa(pixa2, 0, 0, 0, 150, 1); pixaDestroy(&pixa2); if (!recog) return ERROR_INT("failure to make recog from pixa sans outliers", procName, 1); *precog = recog; return 0; } /*! * \brief pixaRemoveOutliers2() * * \param[in] pixas unscaled labeled templates * \param[in] minscore keep everything with at least this score; * use -1.0 for default. * \param[in] minsize minimum number of samples required for a class; * use -1 for default. * \param[out] ppixsave [optional debug] saved templates, with scores * \param[out] ppixrem [optional debug] removed templates, with scores * \return pixa of unscaled templates to be kept, or NULL on error * *
* Notes: * (1) Removing outliers is particularly important when recognition * goes against all the samples in the training set, as opposed * to the averages for each class. The reason is that we get * an identification error if a mislabeled template is a best * match for an input sample. * (2) This method compares each template against the average templates * of each class, and discards any template that has a higher * correlation to a class different from its own. It also * sets a lower bound on correlation scores with its class average. * (3) This is meant to be used on a BAR, where the templates all * come from the same book; use minscore ~0.75. **/ PIXA * pixaRemoveOutliers2(PIXA *pixas, l_float32 minscore, l_int32 minsize, PIX **ppixsave, PIX **ppixrem) { l_int32 i, j, k, n, area1, area2, maxk, debug; l_float32 x1, y1, x2, y2, score, maxscore; NUMA *nan, *nascore, *nasave; PIX *pix1, *pix2, *pix3; PIXA *pixarem, *pixad; L_RECOG *recog; PROCNAME("pixaRemoveOutliers2"); if (ppixsave) *ppixsave = NULL; if (ppixrem) *ppixrem = NULL; if (!pixas) return (PIXA *)ERROR_PTR("pixas not defined", procName, NULL); minscore = L_MIN(minscore, 1.0); if (minscore <= 0.0) minscore = DefaultMinScore; if (minsize < 0) minsize = DefaultMinSetSize; /* Make a special height-scaled recognizer with average templates */ debug = (ppixsave || ppixrem) ? 1 : 0; recog = recogCreateFromPixa(pixas, 0, 40, 0, 128, 1); if (!recog) return (PIXA *)ERROR_PTR("bad pixas; recog not made", procName, NULL); recogAverageSamples(&recog, debug); if (!recog) return (PIXA *)ERROR_PTR("bad templates", procName, NULL); nasave = (ppixsave) ? numaCreate(0) : NULL; pixarem = (ppixrem) ? pixaCreate(0) : NULL; pixad = pixaCreate(0); pixaaGetCount(recog->pixaa, &nan); /* number of templates in each class */ for (i = 0; i < recog->setsize; i++) { /* Get the scores for each sample in the class, when comparing * with averages from all the classes. */ numaGetIValue(nan, i, &n); for (j = 0; j < n; j++) { pix1 = pixaaGetPix(recog->pixaa, i, j, L_CLONE); ptaaGetPt(recog->ptaa, i, j, &x1, &y1); /* centroid */ numaaGetValue(recog->naasum, i, j, NULL, &area1); /* fg sum */ nascore = numaCreate(n); for (k = 0; k < recog->setsize; k++) { /* average templates */ pix2 = pixaGetPix(recog->pixa, k, L_CLONE); ptaGetPt(recog->pta, k, &x2, &y2); /* average centroid */ numaGetIValue(recog->nasum, k, &area2); /* average fg sum */ pixCorrelationScoreSimple(pix1, pix2, area1, area2, x1 - x2, y1 - y2, 5, 5, recog->sumtab, &score); numaAddNumber(nascore, score); pixDestroy(&pix2); } /* Save templates that are in the correct class and * at or above threshold. Toss any classes with less * than %minsize templates. */ numaGetMax(nascore, &maxscore, &maxk); if (maxk == i && maxscore >= minscore && n >= minsize) { /* save it */ pix3 = pixaaGetPix(recog->pixaa_u, i, j, L_COPY); pixaAddPix(pixad, pix3, L_INSERT); if (nasave) numaAddNumber(nasave, maxscore); } else if (ppixrem) { /* outlier */ pix3 = recogDisplayOutlier(recog, i, j, maxk, maxscore); pixaAddPix(pixarem, pix3, L_INSERT); } numaDestroy(&nascore); pixDestroy(&pix1); } } if (ppixsave) { *ppixsave = pixDisplayOutliers(pixad, nasave); numaDestroy(&nasave); } if (ppixrem) { *ppixrem = pixaDisplayTiledInRows(pixarem, 32, 1500, 1.0, 0, 20, 2); pixaDestroy(&pixarem); } numaDestroy(&nan); recogDestroy(&recog); return pixad; } /*------------------------------------------------------------------------* * Training on unlabeled data * *------------------------------------------------------------------------*/ /*! * \brief recogTrainFromBoot() * * \param[in] recogboot labeled boot recognizer * \param[in] pixas set of unlabeled input characters * \param[in] minscore min score for accepting the example; e.g., 0.75 * \param[in] threshold for binarization, if needed * \param[in] debug 1 for debug output saved to recogboot; 0 otherwise * \return pixad labeled version of input pixas, trained on a BSR, * or NULL on error * *
* Notes: * (1) This takes %pixas of unscaled single characters and %recboot, * a bootstrep recognizer (BSR) that has been set up with parameters * * scaleh: scale all templates to this height * * linew: width of normalized strokes, or 0 if using * the input image * It modifies the pix in %pixas accordingly and correlates * with the templates in the BSR. It returns those input * images in %pixas whose best correlation with the BSR is at * or above %minscore. The returned pix have added text labels * for the text string of the class to which the best * correlated template belongs. * (2) Identification occurs in scaled mode (typically with h = 40), * optionally using a width-normalized line images derived * from those in %pixas. **/ PIXA * recogTrainFromBoot(L_RECOG *recogboot, PIXA *pixas, l_float32 minscore, l_int32 threshold, l_int32 debug) { char *text; l_int32 i, n, same, maxd, scaleh, linew; l_float32 score; PIX *pix1, *pix2, *pixdb; PIXA *pixa1, *pixa2, *pixa3, *pixad; PROCNAME("recogTrainFromBoot"); if (!recogboot) return (PIXA *)ERROR_PTR("recogboot not defined", procName, NULL); if (!pixas) return (PIXA *)ERROR_PTR("pixas not defined", procName, NULL); /* Make sure all input pix are 1 bpp */ if ((n = pixaGetCount(pixas)) == 0) return (PIXA *)ERROR_PTR("no pix in pixa", procName, NULL); pixaVerifyDepth(pixas, &same, &maxd); if (maxd == 1) { pixa1 = pixaCopy(pixas, L_COPY); } else { pixa1 = pixaCreate(n); for (i = 0; i < n; i++) { pix1 = pixaGetPix(pixas, i, L_CLONE); pix2 = pixConvertTo1(pix1, threshold); pixaAddPix(pixa1, pix2, L_INSERT); pixDestroy(&pix1); } } /* Scale the input images to match the BSR */ scaleh = recogboot->scaleh; linew = recogboot->linew; pixa2 = pixaCreate(n); for (i = 0; i < n; i++) { pix1 = pixaGetPix(pixa1, i, L_CLONE); pix2 = pixScaleToSize(pix1, 0, scaleh); pixaAddPix(pixa2, pix2, L_INSERT); pixDestroy(&pix1); } pixaDestroy(&pixa1); /* Optionally convert to width-normalized line */ if (linew > 0) pixa3 = pixaSetStrokeWidth(pixa2, linew, 4, 8); else pixa3 = pixaCopy(pixa2, L_CLONE); pixaDestroy(&pixa2); /* Identify using recogboot */ n = pixaGetCount(pixa3); pixad = pixaCreate(n); for (i = 0; i < n; i++) { pix1 = pixaGetPix(pixa3, i, L_COPY); pixSetText(pix1, NULL); /* remove any existing text or labelling */ if (!debug) { recogIdentifyPix(recogboot, pix1, NULL); } else { recogIdentifyPix(recogboot, pix1, &pixdb); pixaAddPix(recogboot->pixadb_boot, pixdb, L_INSERT); } rchExtract(recogboot->rch, NULL, &score, &text, NULL, NULL, NULL, NULL); if (score >= minscore) { pix2 = pixaGetPix(pixas, i, L_COPY); pixSetText(pix2, text); pixaAddPix(pixad, pix2, L_INSERT); pixaAddPix(recogboot->pixadb_boot, pixdb, L_COPY); } LEPT_FREE(text); pixDestroy(&pix1); } pixaDestroy(&pixa3); return pixad; } /*------------------------------------------------------------------------* * Padding the digit training set * *------------------------------------------------------------------------*/ /*! * \brief recogPadDigitTrainingSet() * * \param[in,out] precog trained; if padding is needed, it is replaced * by a a new padded recog * \param[in] scaleh must be > 0; suggest ~40. * \param[in] linew use 0 for original scanned images * \return 0 if OK, 1 on error * *
* Notes: * (1) This is a no-op if padding is not needed. However, * if it is, this replaces the input recog with a new recog, * padded appropriately with templates from a boot recognizer, * and set up with correlation templates derived from * %scaleh and %linew. **/ l_ok recogPadDigitTrainingSet(L_RECOG **precog, l_int32 scaleh, l_int32 linew) { PIXA *pixa; L_RECOG *recog1, *recog2; SARRAY *sa; PROCNAME("recogPadDigitTrainingSet"); if (!precog) return ERROR_INT("&recog not defined", procName, 1); recog1 = *precog; recogIsPaddingNeeded(recog1, &sa); if (!sa) return 0; /* Get a new pixa with the padding templates added */ pixa = recogAddDigitPadTemplates(recog1, sa); sarrayDestroy(&sa); if (!pixa) return ERROR_INT("pixa not made", procName, 1); /* Need to use templates that are scaled to a fixed height. */ if (scaleh <= 0) { L_WARNING("templates must be scaled to fixed height; using %d\n", procName, 40); scaleh = 40; } /* Create a hybrid recog, composed of templates from both * the original and bootstrap sources. */ recog2 = recogCreateFromPixa(pixa, 0, scaleh, linew, recog1->threshold, recog1->maxyshift); pixaDestroy(&pixa); recogDestroy(precog); *precog = recog2; return 0; } /*! * \brief recogIsPaddingNeeded() * * \param[in] recog trained * \param[out] psa addr of returned string containing text value * \return 1 on error; 0 if OK, whether or not additional padding * templates are required. * *
* Notes: * (1) This returns a string array in &sa containing character values * for which extra templates are needed; this sarray is * used by recogGetPadTemplates(). It returns NULL * if no padding templates are needed. **/ l_int32 recogIsPaddingNeeded(L_RECOG *recog, SARRAY **psa) { char *str; l_int32 i, nt, min_nopad, nclass, allclasses; l_float32 minval; NUMA *naclass; SARRAY *sa; PROCNAME("recogIsPaddingNeeded"); if (!psa) return ERROR_INT("&sa not defined", procName, 1); *psa = NULL; if (!recog) return ERROR_INT("recog not defined", procName, 1); /* Do we have samples from all classes? */ nclass = pixaaGetCount(recog->pixaa_u, &naclass); /* unscaled bitmaps */ allclasses = (nclass == recog->charset_size) ? 1 : 0; /* Are there enough samples in each class already? */ min_nopad = recog->min_nopad; numaGetMin(naclass, &minval, NULL); if (allclasses && (minval >= min_nopad)) { numaDestroy(&naclass); return 0; } /* Are any classes not represented? */ sa = recogAddMissingClassStrings(recog); *psa = sa; /* Are any other classes under-represented? */ for (i = 0; i < nclass; i++) { numaGetIValue(naclass, i, &nt); if (nt < min_nopad) { str = sarrayGetString(recog->sa_text, i, L_COPY); sarrayAddString(sa, str, L_INSERT); } } numaDestroy(&naclass); return 0; } /*! * \brief recogAddMissingClassStrings() * * \param[in] recog trained * \return sa of class string missing in %recog, or NULL on error * *
* Notes: * (1) This returns an empty %sa if there is at least one template * in each class in %recog. **/ static SARRAY * recogAddMissingClassStrings(L_RECOG *recog) { char *text; char str[4]; l_int32 i, nclass, index, ival; NUMA *na; SARRAY *sa; PROCNAME("recogAddMissingClassStrings"); if (!recog) return (SARRAY *)ERROR_PTR("recog not defined", procName, NULL); /* Only handling digits */ nclass = pixaaGetCount(recog->pixaa_u, NULL); /* unscaled bitmaps */ if (recog->charset_type != 1 || nclass == 10) return sarrayCreate(0); /* empty */ /* Make an indicator array for missing classes */ na = numaCreate(0); sa = sarrayCreate(0); for (i = 0; i < recog->charset_size; i++) numaAddNumber(na, 1); for (i = 0; i < nclass; i++) { text = sarrayGetString(recog->sa_text, i, L_NOCOPY); index = text[0] - '0'; numaSetValue(na, index, 0); } /* Convert to string and add to output */ for (i = 0; i < nclass; i++) { numaGetIValue(na, i, &ival); if (ival == 1) { str[0] = '0' + i; str[1] = '\0'; sarrayAddString(sa, str, L_COPY); } } numaDestroy(&na); return sa; } /*! * \brief recogAddDigitPadTemplates() * * \param[in] recog trained * \param[in] sa set of text strings that need to be padded * \return pixa of all templates from %recog and the additional pad * templates from a boot recognizer; or NULL on error * *
* Notes: * (1) Call recogIsPaddingNeeded() first, which returns %sa of * template text strings for classes where more templates * are needed. **/ PIXA * recogAddDigitPadTemplates(L_RECOG *recog, SARRAY *sa) { char *str, *text; l_int32 i, j, n, nt; PIX *pix; PIXA *pixa1, *pixa2; PROCNAME("recogAddDigitPadTemplates"); if (!recog) return (PIXA *)ERROR_PTR("recog not defined", procName, NULL); if (!sa) return (PIXA *)ERROR_PTR("sa not defined", procName, NULL); if (recogCharsetAvailable(recog->charset_type) == FALSE) return (PIXA *)ERROR_PTR("boot charset not available", procName, NULL); /* Make boot recog templates */ pixa1 = recogMakeBootDigitTemplates(0, 0); n = pixaGetCount(pixa1); /* Extract the unscaled templates from %recog */ pixa2 = recogExtractPixa(recog); /* Add selected boot recog templates based on the text strings in sa */ nt = sarrayGetCount(sa); for (i = 0; i < n; i++) { pix = pixaGetPix(pixa1, i, L_CLONE); text = pixGetText(pix); for (j = 0; j < nt; j++) { str = sarrayGetString(sa, j, L_NOCOPY); if (!strcmp(text, str)) { pixaAddPix(pixa2, pix, L_COPY); break; } } pixDestroy(&pix); } pixaDestroy(&pixa1); return pixa2; } /*! * \brief recogCharsetAvailable() * * \param[in] type of charset for padding * \return 1 if available; 0 if not. */ static l_int32 recogCharsetAvailable(l_int32 type) { l_int32 ret; PROCNAME("recogCharsetAvailable"); switch (type) { case L_ARABIC_NUMERALS: ret = TRUE; break; case L_LC_ROMAN_NUMERALS: case L_UC_ROMAN_NUMERALS: case L_LC_ALPHA: case L_UC_ALPHA: L_INFO("charset type %d not available\n", procName, type); ret = FALSE; break; default: L_INFO("charset type %d is unknown\n", procName, type); ret = FALSE; break; } return ret; } /*------------------------------------------------------------------------* * Making a boot digit recognizer * *------------------------------------------------------------------------*/ /*! * \brief recogMakeBootDigitRecog() * * \param[in] nsamp number of samples of each digit; or 0 * \param[in] scaleh scale all heights to this; typ. use 40 * \param[in] linew normalized line width; typ. use 5; 0 to skip * \param[in] maxyshift from nominal centroid alignment; typically 0 or 1 * \param[in] debug 1 for showing templates; 0 otherwise * \return recog, or NULL on error * *
* Notes: * (1) This takes a set of pre-computed, labeled pixa of single * digits, and generates a recognizer from them. * The templates used in the recognizer can be modified by: * - scaling (isotropically to fixed height) * - generating a skeleton and thickening so that all strokes * have the same width. * (2) The resulting templates are scaled versions of either the * input bitmaps or images with fixed line widths. To use the * input bitmaps, set %linew = 0; otherwise, set %linew to the * desired line width. * (3) If %nsamp == 0, this uses and extends the output from * three boot generators: * l_bootnum_gen1, l_bootnum_gen2, l_bootnum_gen3. * Otherwise, it uses exactly %nsamp templates of each digit, * extracted by l_bootnum_gen4. **/ L_RECOG * recogMakeBootDigitRecog(l_int32 nsamp, l_int32 scaleh, l_int32 linew, l_int32 maxyshift, l_int32 debug) { PIXA *pixa; L_RECOG *recog; /* Get the templates, extended by horizontal scaling */ pixa = recogMakeBootDigitTemplates(nsamp, debug); /* Make the boot recog; recogModifyTemplate() will scale the * templates and optionally turn them into strokes of fixed width. */ recog = recogCreateFromPixa(pixa, 0, scaleh, linew, 128, maxyshift); pixaDestroy(&pixa); if (debug) recogShowContent(stderr, recog, 0, 1); return recog; } /*! * \brief recogMakeBootDigitTemplates() * * \param[in] nsamp number of samples of each digit; or 0 * \param[in] debug 1 for display of templates * \return pixa of templates; or NULL on error * *
* Notes: * (1) See recogMakeBootDigitRecog(). **/ PIXA * recogMakeBootDigitTemplates(l_int32 nsamp, l_int32 debug) { NUMA *na1; PIX *pix1, *pix2, *pix3; PIXA *pixa1, *pixa2, *pixa3; if (nsamp > 0) { pixa1 = l_bootnum_gen4(nsamp); if (debug) { pix1 = pixaDisplayTiledWithText(pixa1, 1500, 1.0, 10, 2, 6, 0xff000000); pixDisplay(pix1, 0, 0); pixDestroy(&pix1); } return pixa1; } /* Else, generate from 3 pixa */ pixa1 = l_bootnum_gen1(); pixa2 = l_bootnum_gen2(); pixa3 = l_bootnum_gen3(); if (debug) { pix1 = pixaDisplayTiledWithText(pixa1, 1500, 1.0, 10, 2, 6, 0xff000000); pix2 = pixaDisplayTiledWithText(pixa2, 1500, 1.0, 10, 2, 6, 0xff000000); pix3 = pixaDisplayTiledWithText(pixa3, 1500, 1.0, 10, 2, 6, 0xff000000); pixDisplay(pix1, 0, 0); pixDisplay(pix2, 600, 0); pixDisplay(pix3, 1200, 0); pixDestroy(&pix1); pixDestroy(&pix2); pixDestroy(&pix3); } pixaJoin(pixa1, pixa2, 0, -1); pixaJoin(pixa1, pixa3, 0, -1); pixaDestroy(&pixa2); pixaDestroy(&pixa3); /* Extend by horizontal scaling */ na1 = numaCreate(4); numaAddNumber(na1, 0.9); numaAddNumber(na1, 1.1); numaAddNumber(na1, 1.2); pixa2 = pixaExtendByScaling(pixa1, na1, L_HORIZ, 1); pixaDestroy(&pixa1); numaDestroy(&na1); return pixa2; } /*------------------------------------------------------------------------* * Debugging * *------------------------------------------------------------------------*/ /*! * \brief recogShowContent() * * \param[in] fp file stream * \param[in] recog * \param[in] index for naming of output files of template images * \param[in] display 1 for showing template images; 0 otherwise * \return 0 if OK, 1 on error */ l_ok recogShowContent(FILE *fp, L_RECOG *recog, l_int32 index, l_int32 display) { char buf[128]; l_int32 i, val, count; PIX *pix; NUMA *na; PROCNAME("recogShowContent"); if (!fp) return ERROR_INT("stream not defined", procName, 1); if (!recog) return ERROR_INT("recog not defined", procName, 1); fprintf(fp, "Debug print of recog contents\n"); fprintf(fp, " Setsize: %d\n", recog->setsize); fprintf(fp, " Binarization threshold: %d\n", recog->threshold); fprintf(fp, " Maximum matching y-jiggle: %d\n", recog->maxyshift); if (recog->linew <= 0) fprintf(fp, " Using image templates for matching\n"); else fprintf(fp, " Using templates with fixed line width for matching\n"); if (recog->scalew == 0) fprintf(fp, " No width scaling of templates\n"); else fprintf(fp, " Template width scaled to %d\n", recog->scalew); if (recog->scaleh == 0) fprintf(fp, " No height scaling of templates\n"); else fprintf(fp, " Template height scaled to %d\n", recog->scaleh); fprintf(fp, " Number of samples in each class:\n"); pixaaGetCount(recog->pixaa_u, &na); for (i = 0; i < recog->setsize; i++) { l_dnaGetIValue(recog->dna_tochar, i, &val); numaGetIValue(na, i, &count); if (val < 128) fprintf(fp, " class %d, char %c: %d\n", i, val, count); else fprintf(fp, " class %d, val %d: %d\n", i, val, count); } numaDestroy(&na); if (display) { lept_mkdir("lept/recog"); pix = pixaaDisplayByPixa(recog->pixaa_u, 20, 20, 1000); snprintf(buf, sizeof(buf), "/tmp/lept/recog/templates_u.%d.png", index); pixWriteDebug(buf, pix, IFF_PNG); pixDisplay(pix, 0, 200 * index); pixDestroy(&pix); if (recog->train_done) { pix = pixaaDisplayByPixa(recog->pixaa, 20, 20, 1000); snprintf(buf, sizeof(buf), "/tmp/lept/recog/templates.%d.png", index); pixWriteDebug(buf, pix, IFF_PNG); pixDisplay(pix, 800, 200 * index); pixDestroy(&pix); } } return 0; } /*! * \brief recogDebugAverages() * * \param[in] precog addr of recog * \param[in] debug 0 no output; 1 for images; 2 for text; 3 for both * \return 0 if OK, 1 on error * *
* Notes: * (1) Generates an image that pairs each of the input images used * in training with the average template that it is best * correlated to. This is written into the recog. * (2) It also generates pixa_tr of all the input training images, * which can be used, e.g., in recogShowMatchesInRange(). * (3) Destroys the recog if the averaging function finds any bad classes. **/ l_ok recogDebugAverages(L_RECOG **precog, l_int32 debug) { l_int32 i, j, n, np, index; l_float32 score; PIX *pix1, *pix2, *pix3; PIXA *pixa, *pixat; PIXAA *paa1, *paa2; L_RECOG *recog; PROCNAME("recogDebugAverages"); if (!precog) return ERROR_INT("&recog not defined", procName, 1); if ((recog = *precog) == NULL) return ERROR_INT("recog not defined", procName, 1); /* Mark the training as finished if necessary, and make sure * that the average templates have been built. */ recogAverageSamples(&recog, 0); if (!recog) return ERROR_INT("averaging failed; recog destroyed", procName, 1); /* Save a pixa of all the training examples */ paa1 = recog->pixaa; if (!recog->pixa_tr) recog->pixa_tr = pixaaFlattenToPixa(paa1, NULL, L_CLONE); /* Destroy any existing image and make a new one */ if (recog->pixdb_ave) pixDestroy(&recog->pixdb_ave); n = pixaaGetCount(paa1, NULL); paa2 = pixaaCreate(n); for (i = 0; i < n; i++) { pixa = pixaCreate(0); pixat = pixaaGetPixa(paa1, i, L_CLONE); np = pixaGetCount(pixat); for (j = 0; j < np; j++) { pix1 = pixaaGetPix(paa1, i, j, L_CLONE); recogIdentifyPix(recog, pix1, &pix2); rchExtract(recog->rch, &index, &score, NULL, NULL, NULL, NULL, NULL); if (debug >= 2) fprintf(stderr, "index = %d, score = %7.3f\n", index, score); pix3 = pixAddBorder(pix2, 2, 1); pixaAddPix(pixa, pix3, L_INSERT); pixDestroy(&pix1); pixDestroy(&pix2); } pixaaAddPixa(paa2, pixa, L_INSERT); pixaDestroy(&pixat); } recog->pixdb_ave = pixaaDisplayByPixa(paa2, 20, 20, 2500); if (debug % 2) { lept_mkdir("lept/recog"); pixWriteDebug("/tmp/lept/recog/templ_match.png", recog->pixdb_ave, IFF_PNG); pixDisplay(recog->pixdb_ave, 100, 100); } pixaaDestroy(&paa2); return 0; } /*! * \brief recogShowAverageTemplates() * * \param[in] recog * \return 0 on success, 1 on failure * *
* Notes: * (1) This debug routine generates a display of the averaged templates, * both scaled and unscaled, with the centroid visible in red. **/ l_int32 recogShowAverageTemplates(L_RECOG *recog) { l_int32 i, size; l_float32 x, y; PIX *pix1, *pix2, *pixr; PIXA *pixat, *pixadb; PROCNAME("recogShowAverageTemplates"); if (!recog) return ERROR_INT("recog not defined", procName, 1); fprintf(stderr, "min/max width_u = (%d,%d); min/max height_u = (%d,%d)\n", recog->minwidth_u, recog->maxwidth_u, recog->minheight_u, recog->maxheight_u); fprintf(stderr, "min splitw = %d, max splith = %d\n", recog->min_splitw, recog->max_splith); pixaDestroy(&recog->pixadb_ave); pixr = pixCreate(3, 3, 32); /* 3x3 red square for centroid location */ pixSetAllArbitrary(pixr, 0xff000000); pixadb = pixaCreate(2); /* Unscaled bitmaps */ size = recog->setsize; pixat = pixaCreate(size); for (i = 0; i < size; i++) { if ((pix1 = pixaGetPix(recog->pixa_u, i, L_CLONE)) == NULL) continue; pix2 = pixConvertTo32(pix1); ptaGetPt(recog->pta_u, i, &x, &y); pixRasterop(pix2, (l_int32)(x - 0.5), (l_int32)(y - 0.5), 3, 3, PIX_SRC, pixr, 0, 0); pixaAddPix(pixat, pix2, L_INSERT); pixDestroy(&pix1); } pix1 = pixaDisplayTiledInRows(pixat, 32, 3000, 1.0, 0, 20, 0); pixaAddPix(pixadb, pix1, L_INSERT); pixDisplay(pix1, 100, 100); pixaDestroy(&pixat); /* Scaled bitmaps */ pixat = pixaCreate(size); for (i = 0; i < size; i++) { if ((pix1 = pixaGetPix(recog->pixa, i, L_CLONE)) == NULL) continue; pix2 = pixConvertTo32(pix1); ptaGetPt(recog->pta, i, &x, &y); pixRasterop(pix2, (l_int32)(x - 0.5), (l_int32)(y - 0.5), 3, 3, PIX_SRC, pixr, 0, 0); pixaAddPix(pixat, pix2, L_INSERT); pixDestroy(&pix1); } pix1 = pixaDisplayTiledInRows(pixat, 32, 3000, 1.0, 0, 20, 0); pixaAddPix(pixadb, pix1, L_INSERT); pixDisplay(pix1, 100, 100); pixaDestroy(&pixat); pixDestroy(&pixr); recog->pixadb_ave = pixadb; return 0; } /*! * \brief pixDisplayOutliers() * * \param[in] pixas unscaled labeled templates * \param[in] nas scores of templates (against class averages) * \return pix tiled pixa with text and scores, or NULL on failure * *
* Notes: * (1) This debug routine is called from recogRemoveOutliers2(), * and takes the saved templates and their scores as input. **/ static PIX * pixDisplayOutliers(PIXA *pixas, NUMA *nas) { char *text; char buf[16]; l_int32 i, n; l_float32 fval; PIX *pix1, *pix2; PIXA *pixa1; PROCNAME("pixDisplayOutliers"); if (!pixas) return (PIX *)ERROR_PTR("pixas not defined", procName, NULL); if (!nas) return (PIX *)ERROR_PTR("nas not defined", procName, NULL); n = pixaGetCount(pixas); if (numaGetCount(nas) != n) return (PIX *)ERROR_PTR("pixas and nas sizes differ", procName, NULL); pixa1 = pixaCreate(n); for (i = 0; i < n; i++) { pix1 = pixaGetPix(pixas, i, L_CLONE); pix2 = pixAddBlackOrWhiteBorder(pix1, 25, 25, 0, 0, L_GET_WHITE_VAL); text = pixGetText(pix1); numaGetFValue(nas, i, &fval); snprintf(buf, sizeof(buf), "'%s': %5.2f", text, fval); pixSetText(pix2, buf); pixaAddPix(pixa1, pix2, L_INSERT); pixDestroy(&pix1); } pix1 = pixaDisplayTiledWithText(pixa1, 1500, 1.0, 20, 2, 6, 0xff000000); pixaDestroy(&pixa1); return pix1; } /*! * \brief recogDisplayOutlier() * * \param[in] recog * \param[in] iclass sample is in this class * \param[in] jsamp index of sample is class i * \param[in] maxclass index of class with closest average to sample * \param[in] maxscore score of sample with average of class %maxclass * \return pix sample and template images, with score, or NULL on error * *
* Notes: * (1) This shows three templates, side-by-side: * - The outlier sample * - The average template from the same class * - The average class template that best matched the outlier sample **/ static PIX * recogDisplayOutlier(L_RECOG *recog, l_int32 iclass, l_int32 jsamp, l_int32 maxclass, l_float32 maxscore) { char buf[64]; PIX *pix1, *pix2, *pix3, *pix4, *pix5; PIXA *pixa; PROCNAME("recogDisplayOutlier"); if (!recog) return (PIX *)ERROR_PTR("recog not defined", procName, NULL); pix1 = pixaaGetPix(recog->pixaa, iclass, jsamp, L_CLONE); pix2 = pixaGetPix(recog->pixa, iclass, L_CLONE); pix3 = pixaGetPix(recog->pixa, maxclass, L_CLONE); pixa = pixaCreate(3); pixaAddPix(pixa, pix1, L_INSERT); pixaAddPix(pixa, pix2, L_INSERT); pixaAddPix(pixa, pix3, L_INSERT); pix4 = pixaDisplayTiledInRows(pixa, 32, 400, 2.0, 0, 12, 2); snprintf(buf, sizeof(buf), "C=%d, BAC=%d, S=%4.2f", iclass, maxclass, maxscore); pix5 = pixAddSingleTextblock(pix4, recog->bmf, buf, 0xff000000, L_ADD_BELOW, NULL); pixDestroy(&pix4); pixaDestroy(&pixa); return pix5; } /*! * \brief recogShowMatchesInRange() * * \param[in] recog * \param[in] pixa of 1 bpp images to match * \param[in] minscore min score to include output * \param[in] maxscore max score to include output * \param[in] display 1 to display the result * \return 0 if OK, 1 on error * *
* Notes: * (1) This gives a visual output of the best matches for a given * range of scores. Each pair of images can optionally be * labeled with the index of the best match and the correlation. * (2) To use this, save a set of 1 bpp images (labeled or * unlabeled) that can be given to a recognizer in a pixa. * Then call this function with the pixa and parameters * to filter a range of scores. **/ l_ok recogShowMatchesInRange(L_RECOG *recog, PIXA *pixa, l_float32 minscore, l_float32 maxscore, l_int32 display) { l_int32 i, n, index, depth; l_float32 score; NUMA *nascore, *naindex; PIX *pix1, *pix2; PIXA *pixa1, *pixa2; PROCNAME("recogShowMatchesInRange"); if (!recog) return ERROR_INT("recog not defined", procName, 1); if (!pixa) return ERROR_INT("pixa not defined", procName, 1); /* Run the recognizer on the set of images */ n = pixaGetCount(pixa); nascore = numaCreate(n); naindex = numaCreate(n); pixa1 = pixaCreate(n); for (i = 0; i < n; i++) { pix1 = pixaGetPix(pixa, i, L_CLONE); recogIdentifyPix(recog, pix1, &pix2); rchExtract(recog->rch, &index, &score, NULL, NULL, NULL, NULL, NULL); numaAddNumber(nascore, score); numaAddNumber(naindex, index); pixaAddPix(pixa1, pix2, L_INSERT); pixDestroy(&pix1); } /* Filter the set and optionally add text to each */ pixa2 = pixaCreate(n); depth = 1; for (i = 0; i < n; i++) { numaGetFValue(nascore, i, &score); if (score < minscore || score > maxscore) continue; pix1 = pixaGetPix(pixa1, i, L_CLONE); numaGetIValue(naindex, i, &index); pix2 = recogShowMatch(recog, pix1, NULL, NULL, index, score); if (i == 0) depth = pixGetDepth(pix2); pixaAddPix(pixa2, pix2, L_INSERT); pixDestroy(&pix1); } /* Package it up */ pixDestroy(&recog->pixdb_range); if (pixaGetCount(pixa2) > 0) { recog->pixdb_range = pixaDisplayTiledInRows(pixa2, depth, 2500, 1.0, 0, 20, 1); if (display) pixDisplay(recog->pixdb_range, 300, 100); } else { L_INFO("no character matches in the range of scores\n", procName); } pixaDestroy(&pixa1); pixaDestroy(&pixa2); numaDestroy(&nascore); numaDestroy(&naindex); return 0; } /*! * \brief recogShowMatch() * * \param[in] recog * \param[in] pix1 input pix; several possibilities * \param[in] pix2 [optional] matching template * \param[in] box [optional] region in pix1 for which pix2 matches * \param[in] index index of matching template; use -1 to disable printing * \param[in] score score of match * \return pixd pair of images, showing input pix and best template, * optionally with matching information, or NULL on error. * *
* Notes: * (1) pix1 can be one of these: * (a) The input pix alone, which can be either a single character * (box == NULL) or several characters that need to be * segmented. If more than character is present, the box * region is displayed with an outline. * (b) Both the input pix and the matching template. In this case, * pix2 and box will both be null. * (2) If the bmf has been made (by a call to recogMakeBmf()) * and the index >= 0, the text field, match score and index * will be rendered; otherwise their values will be ignored. **/ PIX * recogShowMatch(L_RECOG *recog, PIX *pix1, PIX *pix2, BOX *box, l_int32 index, l_float32 score) { char buf[32]; char *text; L_BMF *bmf; PIX *pix3, *pix4, *pix5, *pixd; PIXA *pixa; PROCNAME("recogShowMatch"); if (!recog) return (PIX *)ERROR_PTR("recog not defined", procName, NULL); if (!pix1) return (PIX *)ERROR_PTR("pix1 not defined", procName, NULL); bmf = (recog->bmf && index >= 0) ? recog->bmf : NULL; if (!pix2 && !box && !bmf) /* nothing to do */ return pixCopy(NULL, pix1); pix3 = pixConvertTo32(pix1); if (box) pixRenderBoxArb(pix3, box, 1, 255, 0, 0); if (pix2) { pixa = pixaCreate(2); pixaAddPix(pixa, pix3, L_CLONE); pixaAddPix(pixa, pix2, L_CLONE); pix4 = pixaDisplayTiledInRows(pixa, 1, 500, 1.0, 0, 15, 0); pixaDestroy(&pixa); } else { pix4 = pixCopy(NULL, pix3); } pixDestroy(&pix3); if (bmf) { pix5 = pixAddBorderGeneral(pix4, 55, 55, 0, 0, 0xffffff00); recogGetClassString(recog, index, &text); snprintf(buf, sizeof(buf), "C=%s, S=%4.3f, I=%d", text, score, index); pixd = pixAddSingleTextblock(pix5, bmf, buf, 0xff000000, L_ADD_BELOW, NULL); pixDestroy(&pix5); LEPT_FREE(text); } else { pixd = pixClone(pix4); } pixDestroy(&pix4); return pixd; }