/*====================================================================* - Copyright (C) 2001 Leptonica. All rights reserved. - - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions - are met: - 1. Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - 2. Redistributions in binary form must reproduce the above - copyright notice, this list of conditions and the following - disclaimer in the documentation and/or other materials - provided with the distribution. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL ANY - CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY - OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. *====================================================================*/ /*! * \file skew.c *
* * Top-level deskew interfaces * PIX *pixDeskewBoth() * PIX *pixDeskew() * PIX *pixFindSkewAndDeskew() * PIX *pixDeskewGeneral() * * Top-level angle-finding interface * l_int32 pixFindSkew() * * Basic angle-finding functions * l_int32 pixFindSkewSweep() * l_int32 pixFindSkewSweepAndSearch() * l_int32 pixFindSkewSweepAndSearchScore() * l_int32 pixFindSkewSweepAndSearchScorePivot() * * Search over arbitrary range of angles in orthogonal directions * l_int32 pixFindSkewOrthogonalRange() * * Differential square sum function for scoring * l_int32 pixFindDifferentialSquareSum() * * Measures of variance of row sums * l_int32 pixFindNormalizedSquareSum() * * * ============================================================== * Page skew detection * * Skew is determined by pixel profiles, which are computed * as pixel sums along the raster line for each line in the * image. By vertically shearing the image by a given angle, * the sums can be computed quickly along the raster lines * rather than along lines at that angle. The score is * computed from these line sums by taking the square of * the DIFFERENCE between adjacent line sums, summed over * all lines. The skew angle is then found as the angle * that maximizes the score. The actual computation for * any sheared image is done in the function * pixFindDifferentialSquareSum(). * * The search for the angle that maximizes this score is * most efficiently performed by first sweeping coarsely * over angles, using a significantly reduced image (say, 4x * reduction), to find the approximate maximum within a half * degree or so, and then doing an interval-halving binary * search at higher resolution to get the skew angle to * within 1/20 degree or better. * * The differential signal is used (rather than just using * that variance of line sums) because it rejects the * background noise due to total number of black pixels, * and has maximum contributions from the baselines and * x-height lines of text when the textlines are aligned * with the raster lines. It also works well in multicolumn * pages where the textlines do not line up across columns. * * The method is fast, accurate to within an angle (in radians) * of approximately the inverse width in pixels of the image, * and will work on a surprisingly small amount of text data * (just a couple of text lines). Consequently, it can * also be used to find local skew if the skew were to vary * significantly over the page. Local skew determination * is not very important except for locating lines of * handwritten text that may be mixed with printed text. **/ #include
* Notes: * (1) This binarizes if necessary and does both horizontal * and vertical deskewing, using the default parameters in * the underlying pixDeskew(). See usage there. * (2) This may return a clone. **/ PIX * pixDeskewBoth(PIX *pixs, l_int32 redsearch) { PIX *pix1, *pix2, *pix3, *pix4; PROCNAME("pixDeskewBoth"); if (!pixs) return (PIX *)ERROR_PTR("pixs not defined", procName, NULL); if (redsearch == 0) redsearch = DefaultBsReduction; else if (redsearch != 1 && redsearch != 2 && redsearch != 4) return (PIX *)ERROR_PTR("redsearch not in {1,2,4}", procName, NULL); pix1 = pixDeskew(pixs, redsearch); pix2 = pixRotate90(pix1, 1); pix3 = pixDeskew(pix2, redsearch); pix4 = pixRotate90(pix3, -1); pixDestroy(&pix1); pixDestroy(&pix2); pixDestroy(&pix3); return pix4; } /*! * \brief pixDeskew() * * \param[in] pixs any depth * \param[in] redsearch for binary search: reduction factor = 1, 2 or 4; * use 0 for default * \return pixd deskewed pix, or NULL on error * *
* Notes: * (1) This binarizes if necessary and finds the skew angle. If the * angle is large enough and there is sufficient confidence, * it returns a deskewed image; otherwise, it returns a clone. * (2) Typical values at 300 ppi for %redsearch are 2 and 4. * At 75 ppi, one should use %redsearch = 1. **/ PIX * pixDeskew(PIX *pixs, l_int32 redsearch) { PROCNAME("pixDeskew"); if (!pixs) return (PIX *)ERROR_PTR("pixs not defined", procName, NULL); if (redsearch == 0) redsearch = DefaultBsReduction; else if (redsearch != 1 && redsearch != 2 && redsearch != 4) return (PIX *)ERROR_PTR("redsearch not in {1,2,4}", procName, NULL); return pixDeskewGeneral(pixs, 0, 0.0, 0.0, redsearch, 0, NULL, NULL); } /*! * \brief pixFindSkewAndDeskew() * * \param[in] pixs any depth * \param[in] redsearch for binary search: reduction factor = 1, 2 or 4; * use 0 for default * \param[out] pangle [optional] angle required to deskew, * in degrees; use NULL to skip * \param[out] pconf [optional] conf value is ratio * of max/min scores; use NULL to skip * \return pixd deskewed pix, or NULL on error * *
* Notes: * (1) This binarizes if necessary and finds the skew angle. If the * angle is large enough and there is sufficient confidence, * it returns a deskewed image; otherwise, it returns a clone. **/ PIX * pixFindSkewAndDeskew(PIX *pixs, l_int32 redsearch, l_float32 *pangle, l_float32 *pconf) { PROCNAME("pixFindSkewAndDeskew"); if (!pixs) return (PIX *)ERROR_PTR("pixs not defined", procName, NULL); if (redsearch == 0) redsearch = DefaultBsReduction; else if (redsearch != 1 && redsearch != 2 && redsearch != 4) return (PIX *)ERROR_PTR("redsearch not in {1,2,4}", procName, NULL); return pixDeskewGeneral(pixs, 0, 0.0, 0.0, redsearch, 0, pangle, pconf); } /*! * \brief pixDeskewGeneral() * * \param[in] pixs any depth * \param[in] redsweep for linear search: reduction factor = 1, 2 or 4; * use 0 for default * \param[in] sweeprange in degrees in each direction from 0; * use 0.0 for default * \param[in] sweepdelta in degrees; use 0.0 for default * \param[in] redsearch for binary search: reduction factor = 1, 2 or 4; * use 0 for default; * \param[in] thresh for binarizing the image; use 0 for default * \param[out] pangle [optional] angle required to deskew, * in degrees; use NULL to skip * \param[out] pconf [optional] conf value is ratio * of max/min scores; use NULL to skip * \return pixd deskewed pix, or NULL on error * *
* Notes: * (1) This binarizes if necessary and finds the skew angle. If the * angle is large enough and there is sufficient confidence, * it returns a deskewed image; otherwise, it returns a clone. **/ PIX * pixDeskewGeneral(PIX *pixs, l_int32 redsweep, l_float32 sweeprange, l_float32 sweepdelta, l_int32 redsearch, l_int32 thresh, l_float32 *pangle, l_float32 *pconf) { l_int32 ret, depth; l_float32 angle, conf, deg2rad; PIX *pixb, *pixd; PROCNAME("pixDeskewGeneral"); if (pangle) *pangle = 0.0; if (pconf) *pconf = 0.0; if (!pixs) return (PIX *)ERROR_PTR("pixs not defined", procName, NULL); if (redsweep == 0) redsweep = DefaultSweepReduction; else if (redsweep != 1 && redsweep != 2 && redsweep != 4) return (PIX *)ERROR_PTR("redsweep not in {1,2,4}", procName, NULL); if (sweeprange == 0.0) sweeprange = DefaultSweepRange; if (sweepdelta == 0.0) sweepdelta = DefaultSweepDelta; if (redsearch == 0) redsearch = DefaultBsReduction; else if (redsearch != 1 && redsearch != 2 && redsearch != 4) return (PIX *)ERROR_PTR("redsearch not in {1,2,4}", procName, NULL); if (thresh == 0) thresh = DefaultBinaryThreshold; deg2rad = 3.1415926535 / 180.; /* Binarize if necessary */ depth = pixGetDepth(pixs); if (depth == 1) pixb = pixClone(pixs); else pixb = pixConvertTo1(pixs, thresh); /* Use the 1 bpp image to find the skew */ ret = pixFindSkewSweepAndSearch(pixb, &angle, &conf, redsweep, redsearch, sweeprange, sweepdelta, DefaultMinbsDelta); pixDestroy(&pixb); if (pangle) *pangle = angle; if (pconf) *pconf = conf; if (ret) return pixClone(pixs); if (L_ABS(angle) < MinDeskewAngle || conf < MinAllowedConfidence) return pixClone(pixs); if ((pixd = pixRotate(pixs, deg2rad * angle, L_ROTATE_AREA_MAP, L_BRING_IN_WHITE, 0, 0)) == NULL) return pixClone(pixs); else return pixd; } /*-----------------------------------------------------------------------* * Simple top-level angle-finding interface * *-----------------------------------------------------------------------*/ /*! * \brief pixFindSkew() * * \param[in] pixs 1 bpp * \param[out] pangle angle required to deskew, in degrees * \param[out] pconf confidence value is ratio max/min scores * \return 0 if OK, 1 on error or if angle measurement not valid * *
* Notes: * (1) This is a simple high-level interface, that uses default * values of the parameters for reasonable speed and accuracy. * (2) The angle returned is the negative of the skew angle of * the image. It is the angle required for deskew. * Clockwise rotations are positive angles. **/ l_ok pixFindSkew(PIX *pixs, l_float32 *pangle, l_float32 *pconf) { PROCNAME("pixFindSkew"); if (pangle) *pangle = 0.0; if (pconf) *pconf = 0.0; if (!pangle || !pconf) return ERROR_INT("&angle and/or &conf not defined", procName, 1); if (!pixs) return ERROR_INT("pixs not defined", procName, 1); if (pixGetDepth(pixs) != 1) return ERROR_INT("pixs not 1 bpp", procName, 1); return pixFindSkewSweepAndSearch(pixs, pangle, pconf, DefaultSweepReduction, DefaultBsReduction, DefaultSweepRange, DefaultSweepDelta, DefaultMinbsDelta); } /*-----------------------------------------------------------------------* * Basic angle-finding functions * *-----------------------------------------------------------------------*/ /*! * \brief pixFindSkewSweep() * * \param[in] pixs 1 bpp * \param[out] pangle angle required to deskew, in degrees * \param[in] reduction factor = 1, 2, 4 or 8 * \param[in] sweeprange half the full range; assumed about 0; in degrees * \param[in] sweepdelta angle increment of sweep; in degrees * \return 0 if OK, 1 on error or if angle measurement not valid * *
* Notes: * (1) This examines the 'score' for skew angles with equal intervals. * (2) Caller must check the return value for validity of the result. **/ l_ok pixFindSkewSweep(PIX *pixs, l_float32 *pangle, l_int32 reduction, l_float32 sweeprange, l_float32 sweepdelta) { l_int32 ret, bzero, i, nangles; l_float32 deg2rad, theta; l_float32 sum, maxscore, maxangle; NUMA *natheta, *nascore; PIX *pix, *pixt; PROCNAME("pixFindSkewSweep"); if (!pangle) return ERROR_INT("&angle not defined", procName, 1); *pangle = 0.0; if (!pixs) return ERROR_INT("pixs not defined", procName, 1); if (pixGetDepth(pixs) != 1) return ERROR_INT("pixs not 1 bpp", procName, 1); if (reduction != 1 && reduction != 2 && reduction != 4 && reduction != 8) return ERROR_INT("reduction must be in {1,2,4,8}", procName, 1); deg2rad = 3.1415926535 / 180.; ret = 0; /* Generate reduced image, if requested */ if (reduction == 1) pix = pixClone(pixs); else if (reduction == 2) pix = pixReduceRankBinaryCascade(pixs, 1, 0, 0, 0); else if (reduction == 4) pix = pixReduceRankBinaryCascade(pixs, 1, 1, 0, 0); else /* reduction == 8 */ pix = pixReduceRankBinaryCascade(pixs, 1, 1, 2, 0); pixZero(pix, &bzero); if (bzero) { pixDestroy(&pix); return 1; } nangles = (l_int32)((2. * sweeprange) / sweepdelta + 1); natheta = numaCreate(nangles); nascore = numaCreate(nangles); pixt = pixCreateTemplate(pix); if (!pix || !pixt) { ret = ERROR_INT("pix and pixt not both made", procName, 1); goto cleanup; } if (!natheta || !nascore) { ret = ERROR_INT("natheta and nascore not both made", procName, 1); goto cleanup; } for (i = 0; i < nangles; i++) { theta = -sweeprange + i * sweepdelta; /* degrees */ /* Shear pix about the UL corner and put the result in pixt */ pixVShearCorner(pixt, pix, deg2rad * theta, L_BRING_IN_WHITE); /* Get score */ pixFindDifferentialSquareSum(pixt, &sum); #if DEBUG_PRINT_SCORES L_INFO("sum(%7.2f) = %7.0f\n", procName, theta, sum); #endif /* DEBUG_PRINT_SCORES */ /* Save the result in the output arrays */ numaAddNumber(nascore, sum); numaAddNumber(natheta, theta); } /* Find the location of the maximum (i.e., the skew angle) * by fitting the largest data point and its two neighbors * to a quadratic, using lagrangian interpolation. */ numaFitMax(nascore, &maxscore, natheta, &maxangle); *pangle = maxangle; #if DEBUG_PRINT_SWEEP L_INFO(" From sweep: angle = %7.3f, score = %7.3f\n", procName, maxangle, maxscore); #endif /* DEBUG_PRINT_SWEEP */ #if DEBUG_PLOT_SCORES /* Plot the result -- the scores versus rotation angle -- * using gnuplot with GPLOT_LINES (lines connecting data points). * The GPLOT data structure is first created, with the * appropriate data incorporated from the two input NUMAs, * and then the function gplotMakeOutput() uses gnuplot to * generate the output plot. This can be either a .png file * or a .ps file, depending on whether you use GPLOT_PNG * or GPLOT_PS. */ {GPLOT *gplot; gplot = gplotCreate("sweep_output", GPLOT_PNG, "Sweep. Variance of difference of ON pixels vs. angle", "angle (deg)", "score"); gplotAddPlot(gplot, natheta, nascore, GPLOT_LINES, "plot1"); gplotAddPlot(gplot, natheta, nascore, GPLOT_POINTS, "plot2"); gplotMakeOutput(gplot); gplotDestroy(&gplot); } #endif /* DEBUG_PLOT_SCORES */ cleanup: pixDestroy(&pix); pixDestroy(&pixt); numaDestroy(&nascore); numaDestroy(&natheta); return ret; } /*! * \brief pixFindSkewSweepAndSearch() * * \param[in] pixs 1 bpp * \param[out] pangle angle required to deskew; in degrees * \param[out] pconf confidence given by ratio of max/min score * \param[in] redsweep sweep reduction factor = 1, 2, 4 or 8 * \param[in] redsearch binary search reduction factor = 1, 2, 4 or 8; * and must not exceed redsweep * \param[in] sweeprange half the full range, assumed about 0; in degrees * \param[in] sweepdelta angle increment of sweep; in degrees * \param[in] minbsdelta min binary search increment angle; in degrees * \return 0 if OK, 1 on error or if angle measurement not valid * *
* Notes: * (1) This finds the skew angle, doing first a sweep through a set * of equal angles, and then doing a binary search until * convergence. * (2) Caller must check the return value for validity of the result. * (3) In computing the differential line sum variance score, we sum * the result over scanlines, but we always skip: * ~ at least one scanline * ~ not more than 10% of the image height * ~ not more than 5% of the image width * (4) See also notes in pixFindSkewSweepAndSearchScore() **/ l_ok pixFindSkewSweepAndSearch(PIX *pixs, l_float32 *pangle, l_float32 *pconf, l_int32 redsweep, l_int32 redsearch, l_float32 sweeprange, l_float32 sweepdelta, l_float32 minbsdelta) { return pixFindSkewSweepAndSearchScore(pixs, pangle, pconf, NULL, redsweep, redsearch, 0.0, sweeprange, sweepdelta, minbsdelta); } /*! * \brief pixFindSkewSweepAndSearchScore() * * \param[in] pixs 1 bpp * \param[out] pangle angle required to deskew; in degrees * \param[out] pconf confidence given by ratio of max/min score * \param[out] pendscore [optional] max score; use NULL to ignore * \param[in] redsweep sweep reduction factor = 1, 2, 4 or 8 * \param[in] redsearch binary search reduction factor = 1, 2, 4 or 8; * and must not exceed redsweep * \param[in] sweepcenter angle about which sweep is performed; in degrees * \param[in] sweeprange half the full range, taken about sweepcenter; * in degrees * \param[in] sweepdelta angle increment of sweep; in degrees * \param[in] minbsdelta min binary search increment angle; in degrees * \return 0 if OK, 1 on error or if angle measurement not valid * *
* Notes: * (1) This finds the skew angle, doing first a sweep through a set * of equal angles, and then doing a binary search until convergence. * (2) There are two built-in constants that determine if the * returned confidence is nonzero: * ~ MinValidMaxscore (minimum allowed maxscore) * ~ MinscoreThreshFactor (determines minimum allowed * minscore, by multiplying by (height * width^2) * If either of these conditions is not satisfied, the returned * confidence value will be zero. The maxscore is optionally * returned in this function to allow evaluation of the * resulting angle by a method that is independent of the * returned confidence value. * (3) The larger the confidence value, the greater the probability * that the proper alignment is given by the angle that maximizes * variance. It should be compared to a threshold, which depends * on the application. Values between 3.0 and 6.0 are common. * (4) By default, the shear is about the UL corner. **/ l_ok pixFindSkewSweepAndSearchScore(PIX *pixs, l_float32 *pangle, l_float32 *pconf, l_float32 *pendscore, l_int32 redsweep, l_int32 redsearch, l_float32 sweepcenter, l_float32 sweeprange, l_float32 sweepdelta, l_float32 minbsdelta) { return pixFindSkewSweepAndSearchScorePivot(pixs, pangle, pconf, pendscore, redsweep, redsearch, 0.0, sweeprange, sweepdelta, minbsdelta, L_SHEAR_ABOUT_CORNER); } /*! * \brief pixFindSkewSweepAndSearchScorePivot() * * \param[in] pixs 1 bpp * \param[out] pangle angle required to deskew; in degrees * \param[out] pconf confidence given by ratio of max/min score * \param[out] pendscore [optional] max score; use NULL to ignore * \param[in] redsweep sweep reduction factor = 1, 2, 4 or 8 * \param[in] redsearch binary search reduction factor = 1, 2, 4 or 8; * and must not exceed redsweep * \param[in] sweepcenter angle about which sweep is performed; in degrees * \param[in] sweeprange half the full range, taken about sweepcenter; * in degrees * \param[in] sweepdelta angle increment of sweep; in degrees * \param[in] minbsdelta min binary search increment angle; in degrees * \param[in] pivot L_SHEAR_ABOUT_CORNER, L_SHEAR_ABOUT_CENTER * \return 0 if OK, 1 on error or if angle measurement not valid * *
* Notes: * (1) See notes in pixFindSkewSweepAndSearchScore(). * (2) This allows choice of shear pivoting from either the UL corner * or the center. For small angles, the ability to discriminate * angles is better with shearing from the UL corner. However, * for large angles (say, greater than 20 degrees), it is better * to shear about the center because a shear from the UL corner * loses too much of the image. **/ l_ok pixFindSkewSweepAndSearchScorePivot(PIX *pixs, l_float32 *pangle, l_float32 *pconf, l_float32 *pendscore, l_int32 redsweep, l_int32 redsearch, l_float32 sweepcenter, l_float32 sweeprange, l_float32 sweepdelta, l_float32 minbsdelta, l_int32 pivot) { l_int32 ret, bzero, i, nangles, n, ratio, maxindex, minloc; l_int32 width, height; l_float32 deg2rad, theta, delta; l_float32 sum, maxscore, maxangle; l_float32 centerangle, leftcenterangle, rightcenterangle; l_float32 lefttemp, righttemp; l_float32 bsearchscore[5]; l_float32 minscore, minthresh; l_float32 rangeleft; NUMA *natheta, *nascore; PIX *pixsw, *pixsch, *pixt1, *pixt2; PROCNAME("pixFindSkewSweepAndSearchScorePivot"); if (pendscore) *pendscore = 0.0; if (pangle) *pangle = 0.0; if (pconf) *pconf = 0.0; if (!pangle || !pconf) return ERROR_INT("&angle and/or &conf not defined", procName, 1); if (!pixs || pixGetDepth(pixs) != 1) return ERROR_INT("pixs not defined or not 1 bpp", procName, 1); if (redsweep != 1 && redsweep != 2 && redsweep != 4 && redsweep != 8) return ERROR_INT("redsweep must be in {1,2,4,8}", procName, 1); if (redsearch != 1 && redsearch != 2 && redsearch != 4 && redsearch != 8) return ERROR_INT("redsearch must be in {1,2,4,8}", procName, 1); if (redsearch > redsweep) return ERROR_INT("redsearch must not exceed redsweep", procName, 1); if (pivot != L_SHEAR_ABOUT_CORNER && pivot != L_SHEAR_ABOUT_CENTER) return ERROR_INT("invalid pivot", procName, 1); deg2rad = 3.1415926535 / 180.; ret = 0; /* Generate reduced image for binary search, if requested */ if (redsearch == 1) pixsch = pixClone(pixs); else if (redsearch == 2) pixsch = pixReduceRankBinaryCascade(pixs, 1, 0, 0, 0); else if (redsearch == 4) pixsch = pixReduceRankBinaryCascade(pixs, 1, 1, 0, 0); else /* redsearch == 8 */ pixsch = pixReduceRankBinaryCascade(pixs, 1, 1, 2, 0); pixZero(pixsch, &bzero); if (bzero) { pixDestroy(&pixsch); return 1; } /* Generate reduced image for sweep, if requested */ ratio = redsweep / redsearch; if (ratio == 1) { pixsw = pixClone(pixsch); } else { /* ratio > 1 */ if (ratio == 2) pixsw = pixReduceRankBinaryCascade(pixsch, 1, 0, 0, 0); else if (ratio == 4) pixsw = pixReduceRankBinaryCascade(pixsch, 1, 2, 0, 0); else /* ratio == 8 */ pixsw = pixReduceRankBinaryCascade(pixsch, 1, 2, 2, 0); } pixt1 = pixCreateTemplate(pixsw); if (ratio == 1) pixt2 = pixClone(pixt1); else pixt2 = pixCreateTemplate(pixsch); nangles = (l_int32)((2. * sweeprange) / sweepdelta + 1); natheta = numaCreate(nangles); nascore = numaCreate(nangles); if (!pixsch || !pixsw) { ret = ERROR_INT("pixsch and pixsw not both made", procName, 1); goto cleanup; } if (!pixt1 || !pixt2) { ret = ERROR_INT("pixt1 and pixt2 not both made", procName, 1); goto cleanup; } if (!natheta || !nascore) { ret = ERROR_INT("natheta and nascore not both made", procName, 1); goto cleanup; } /* Do sweep */ rangeleft = sweepcenter - sweeprange; for (i = 0; i < nangles; i++) { theta = rangeleft + i * sweepdelta; /* degrees */ /* Shear pix and put the result in pixt1 */ if (pivot == L_SHEAR_ABOUT_CORNER) pixVShearCorner(pixt1, pixsw, deg2rad * theta, L_BRING_IN_WHITE); else pixVShearCenter(pixt1, pixsw, deg2rad * theta, L_BRING_IN_WHITE); /* Get score */ pixFindDifferentialSquareSum(pixt1, &sum); #if DEBUG_PRINT_SCORES L_INFO("sum(%7.2f) = %7.0f\n", procName, theta, sum); #endif /* DEBUG_PRINT_SCORES */ /* Save the result in the output arrays */ numaAddNumber(nascore, sum); numaAddNumber(natheta, theta); } /* Find the largest of the set (maxscore at maxangle) */ numaGetMax(nascore, &maxscore, &maxindex); numaGetFValue(natheta, maxindex, &maxangle); #if DEBUG_PRINT_SWEEP L_INFO(" From sweep: angle = %7.3f, score = %7.3f\n", procName, maxangle, maxscore); #endif /* DEBUG_PRINT_SWEEP */ #if DEBUG_PLOT_SCORES /* Plot the sweep result -- the scores versus rotation angle -- * using gnuplot with GPLOT_LINES (lines connecting data points). */ {GPLOT *gplot; gplot = gplotCreate("sweep_output", GPLOT_PNG, "Sweep. Variance of difference of ON pixels vs. angle", "angle (deg)", "score"); gplotAddPlot(gplot, natheta, nascore, GPLOT_LINES, "plot1"); gplotAddPlot(gplot, natheta, nascore, GPLOT_POINTS, "plot2"); gplotMakeOutput(gplot); gplotDestroy(&gplot); } #endif /* DEBUG_PLOT_SCORES */ /* Check if the max is at the end of the sweep. */ n = numaGetCount(natheta); if (maxindex == 0 || maxindex == n - 1) { L_WARNING("max found at sweep edge\n", procName); goto cleanup; } /* Empty the numas for re-use */ numaEmpty(nascore); numaEmpty(natheta); /* Do binary search to find skew angle. * First, set up initial three points. */ centerangle = maxangle; if (pivot == L_SHEAR_ABOUT_CORNER) { pixVShearCorner(pixt2, pixsch, deg2rad * centerangle, L_BRING_IN_WHITE); pixFindDifferentialSquareSum(pixt2, &bsearchscore[2]); pixVShearCorner(pixt2, pixsch, deg2rad * (centerangle - sweepdelta), L_BRING_IN_WHITE); pixFindDifferentialSquareSum(pixt2, &bsearchscore[0]); pixVShearCorner(pixt2, pixsch, deg2rad * (centerangle + sweepdelta), L_BRING_IN_WHITE); pixFindDifferentialSquareSum(pixt2, &bsearchscore[4]); } else { pixVShearCenter(pixt2, pixsch, deg2rad * centerangle, L_BRING_IN_WHITE); pixFindDifferentialSquareSum(pixt2, &bsearchscore[2]); pixVShearCenter(pixt2, pixsch, deg2rad * (centerangle - sweepdelta), L_BRING_IN_WHITE); pixFindDifferentialSquareSum(pixt2, &bsearchscore[0]); pixVShearCenter(pixt2, pixsch, deg2rad * (centerangle + sweepdelta), L_BRING_IN_WHITE); pixFindDifferentialSquareSum(pixt2, &bsearchscore[4]); } numaAddNumber(nascore, bsearchscore[2]); numaAddNumber(natheta, centerangle); numaAddNumber(nascore, bsearchscore[0]); numaAddNumber(natheta, centerangle - sweepdelta); numaAddNumber(nascore, bsearchscore[4]); numaAddNumber(natheta, centerangle + sweepdelta); /* Start the search */ delta = 0.5 * sweepdelta; while (delta >= minbsdelta) { /* Get the left intermediate score */ leftcenterangle = centerangle - delta; if (pivot == L_SHEAR_ABOUT_CORNER) pixVShearCorner(pixt2, pixsch, deg2rad * leftcenterangle, L_BRING_IN_WHITE); else pixVShearCenter(pixt2, pixsch, deg2rad * leftcenterangle, L_BRING_IN_WHITE); pixFindDifferentialSquareSum(pixt2, &bsearchscore[1]); numaAddNumber(nascore, bsearchscore[1]); numaAddNumber(natheta, leftcenterangle); /* Get the right intermediate score */ rightcenterangle = centerangle + delta; if (pivot == L_SHEAR_ABOUT_CORNER) pixVShearCorner(pixt2, pixsch, deg2rad * rightcenterangle, L_BRING_IN_WHITE); else pixVShearCenter(pixt2, pixsch, deg2rad * rightcenterangle, L_BRING_IN_WHITE); pixFindDifferentialSquareSum(pixt2, &bsearchscore[3]); numaAddNumber(nascore, bsearchscore[3]); numaAddNumber(natheta, rightcenterangle); /* Find the maximum of the five scores and its location. * Note that the maximum must be in the center * three values, not in the end two. */ maxscore = bsearchscore[1]; maxindex = 1; for (i = 2; i < 4; i++) { if (bsearchscore[i] > maxscore) { maxscore = bsearchscore[i]; maxindex = i; } } /* Set up score array to interpolate for the next iteration */ lefttemp = bsearchscore[maxindex - 1]; righttemp = bsearchscore[maxindex + 1]; bsearchscore[2] = maxscore; bsearchscore[0] = lefttemp; bsearchscore[4] = righttemp; /* Get new center angle and delta for next iteration */ centerangle = centerangle + delta * (maxindex - 2); delta = 0.5 * delta; } *pangle = centerangle; #if DEBUG_PRINT_SCORES L_INFO(" Binary search score = %7.3f\n", procName, bsearchscore[2]); #endif /* DEBUG_PRINT_SCORES */ if (pendscore) /* save if requested */ *pendscore = bsearchscore[2]; /* Return the ratio of Max score over Min score * as a confidence value. Don't trust if the Min score * is too small, which can happen if the image is all black * with only a few white pixels interspersed. In that case, * we get a contribution from the top and bottom edges when * vertically sheared, but this contribution becomes zero when * the shear angle is zero. For zero shear angle, the only * contribution will be from the white pixels. We expect that * the signal goes as the product of the (height * width^2), * so we compute a (hopefully) normalized minimum threshold as * a function of these dimensions. */ numaGetMin(nascore, &minscore, &minloc); width = pixGetWidth(pixsch); height = pixGetHeight(pixsch); minthresh = MinscoreThreshFactor * width * width * height; #if DEBUG_THRESHOLD L_INFO(" minthresh = %10.2f, minscore = %10.2f\n", procName, minthresh, minscore); L_INFO(" maxscore = %10.2f\n", procName, maxscore); #endif /* DEBUG_THRESHOLD */ if (minscore > minthresh) *pconf = maxscore / minscore; else *pconf = 0.0; /* Don't trust it if too close to the edge of the sweep * range or if maxscore is small */ if ((centerangle > rangeleft + 2 * sweeprange - sweepdelta) || (centerangle < rangeleft + sweepdelta) || (maxscore < MinValidMaxscore)) *pconf = 0.0; #if DEBUG_PRINT_BINARY fprintf(stderr, "Binary search: angle = %7.3f, score ratio = %6.2f\n", *pangle, *pconf); fprintf(stderr, " max score = %8.0f\n", maxscore); #endif /* DEBUG_PRINT_BINARY */ #if DEBUG_PLOT_SCORES /* Plot the result -- the scores versus rotation angle -- * using gnuplot with GPLOT_POINTS. Because the data * points are not ordered by theta (increasing or decreasing), * using GPLOT_LINES would be confusing! */ {GPLOT *gplot; gplot = gplotCreate("search_output", GPLOT_PNG, "Binary search. Variance of difference of ON pixels vs. angle", "angle (deg)", "score"); gplotAddPlot(gplot, natheta, nascore, GPLOT_POINTS, "plot1"); gplotMakeOutput(gplot); gplotDestroy(&gplot); } #endif /* DEBUG_PLOT_SCORES */ cleanup: pixDestroy(&pixsw); pixDestroy(&pixsch); pixDestroy(&pixt1); pixDestroy(&pixt2); numaDestroy(&nascore); numaDestroy(&natheta); return ret; } /*---------------------------------------------------------------------* * Search over arbitrary range of angles in orthogonal directions * *---------------------------------------------------------------------*/ /* * \brief pixFindSkewOrthogonalRange() * * \param[in] pixs 1 bpp * \param[out] pangle angle required to deskew; in degrees cw * \param[out] pconf confidence given by ratio of max/min score * \param[in] redsweep sweep reduction factor = 1, 2, 4 or 8 * \param[in] redsearch binary search reduction factor = 1, 2, 4 or 8; * and must not exceed redsweep * \param[in] sweeprange half the full range in each orthogonal * direction, taken about 0, in degrees * \param[in] sweepdelta angle increment of sweep; in degrees * \param[in] minbsdelta min binary search increment angle; in degrees * \param[in] confprior amount by which confidence of 90 degree rotated * result is reduced when comparing with unrotated * confidence value * \return 0 if OK, 1 on error or if angle measurement not valid * *
* Notes: * (1) This searches for the skew angle, first in the range * [-sweeprange, sweeprange], and then in * [90 - sweeprange, 90 + sweeprange], with angles measured * clockwise. For exploring the full range of possibilities, * suggest using sweeprange = 47.0 degrees, giving some overlap * at 45 and 135 degrees. From these results, and discounting * the the second confidence by %confprior, it selects the * angle for maximal differential variance. If the angle * is larger than pi/4, the angle found after 90 degree rotation * is selected. * (2) The larger the confidence value, the greater the probability * that the proper alignment is given by the angle that maximizes * variance. It should be compared to a threshold, which depends * on the application. Values between 3.0 and 6.0 are common. * (3) Allowing for both portrait and landscape searches is more * difficult, because if the signal from the text lines is weak, * a signal from vertical rules can be larger! * The most difficult documents to deskew have some or all of: * (a) Multiple columns, not aligned * (b) Black lines along the vertical edges * (c) Text from two pages, and at different angles * Rule of thumb for resolution: * (a) If the margins are clean, you can work at 75 ppi, * although 100 ppi is safer. * (b) If there are vertical lines in the margins, do not * work below 150 ppi. The signal from the text lines must * exceed that from the margin lines. * (4) Choosing the %confprior parameter depends on knowing something * about the source of image. However, we're not using * real probabilities here, so its use is qualitative. * If landscape and portrait are equally likely, use * %confprior = 0.0. If the likelihood of portrait (non-rotated) * is 100 times higher than that of landscape, we want to reduce * the chance that we rotate to landscape in a situation where * the landscape signal is accidentally larger than the * portrait signal. To do this use a positive value of * %confprior; say 1.5. **/ l_int32 pixFindSkewOrthogonalRange(PIX *pixs, l_float32 *pangle, l_float32 *pconf, l_int32 redsweep, l_int32 redsearch, l_float32 sweeprange, l_float32 sweepdelta, l_float32 minbsdelta, l_float32 confprior) { l_float32 angle1, conf1, score1, angle2, conf2, score2; PIX *pixr; PROCNAME("pixFindSkewOrthogonalRange"); if (pangle) *pangle = 0.0; if (pconf) *pconf = 0.0; if (!pangle || !pconf) return ERROR_INT("&angle and/or &conf not defined", procName, 1); if (!pixs || pixGetDepth(pixs) != 1) return ERROR_INT("pixs not defined or not 1 bpp", procName, 1); pixFindSkewSweepAndSearchScorePivot(pixs, &angle1, &conf1, &score1, redsweep, redsearch, 0.0, sweeprange, sweepdelta, minbsdelta, L_SHEAR_ABOUT_CORNER); pixr = pixRotateOrth(pixs, 1); pixFindSkewSweepAndSearchScorePivot(pixr, &angle2, &conf2, &score2, redsweep, redsearch, 0.0, sweeprange, sweepdelta, minbsdelta, L_SHEAR_ABOUT_CORNER); pixDestroy(&pixr); if (conf1 > conf2 - confprior) { *pangle = angle1; *pconf = conf1; } else { *pangle = -90.0 + angle2; *pconf = conf2; } #if DEBUG_PRINT_ORTH fprintf(stderr, " About 0: angle1 = %7.3f, conf1 = %7.3f, score1 = %f\n", angle1, conf1, score1); fprintf(stderr, " About 90: angle2 = %7.3f, conf2 = %7.3f, score2 = %f\n", angle2, conf2, score2); fprintf(stderr, " Final: angle = %7.3f, conf = %7.3f\n", *pangle, *pconf); #endif /* DEBUG_PRINT_ORTH */ return 0; } /*----------------------------------------------------------------* * Differential square sum function * *----------------------------------------------------------------*/ /*! * \brief pixFindDifferentialSquareSum() * * \param[in] pixs * \param[out] psum result * \return 0 if OK, 1 on error * *
* Notes: * (1) At the top and bottom, we skip: * ~ at least one scanline * ~ not more than 10% of the image height * ~ not more than 5% of the image width **/ l_ok pixFindDifferentialSquareSum(PIX *pixs, l_float32 *psum) { l_int32 i, n; l_int32 w, h, skiph, skip, nskip; l_float32 val1, val2, diff, sum; NUMA *na; PROCNAME("pixFindDifferentialSquareSum"); if (!psum) return ERROR_INT("&sum not defined", procName, 1); *psum = 0.0; if (!pixs) return ERROR_INT("pixs not defined", procName, 1); /* Generate a number array consisting of the sum * of pixels in each row of pixs */ if ((na = pixCountPixelsByRow(pixs, NULL)) == NULL) return ERROR_INT("na not made", procName, 1); /* Compute the number of rows at top and bottom to omit. * We omit these to avoid getting a spurious signal from * the top and bottom of a (nearly) all black image. */ w = pixGetWidth(pixs); h = pixGetHeight(pixs); skiph = (l_int32)(0.05 * w); /* skip for max shear of 0.025 radians */ skip = L_MIN(h / 10, skiph); /* don't remove more than 10% of image */ nskip = L_MAX(skip / 2, 1); /* at top & bot; skip at least one line */ /* Sum the squares of differential row sums, on the * allowed rows. Note that nskip must be >= 1. */ n = numaGetCount(na); sum = 0.0; for (i = nskip; i < n - nskip; i++) { numaGetFValue(na, i - 1, &val1); numaGetFValue(na, i, &val2); diff = val2 - val1; sum += diff * diff; } numaDestroy(&na); *psum = sum; return 0; } /*----------------------------------------------------------------* * Normalized square sum * *----------------------------------------------------------------*/ /*! * \brief pixFindNormalizedSquareSum() * * \param[in] pixs * \param[out] phratio [optional] ratio of normalized horiz square sum * to result if the pixel distribution were uniform * \param[out] pvratio [optional] ratio of normalized vert square sum * to result if the pixel distribution were uniform * \param[out] pfract [optional] ratio of fg pixels to total pixels * \return 0 if OK, 1 on error or if there are no fg pixels * *
* Notes: * (1) Let the image have h scanlines and N fg pixels. * If the pixels were uniformly distributed on scanlines, * the sum of squares of fg pixels on each scanline would be * h * (N / h)^2. However, if the pixels are not uniformly * distributed (e.g., for text), the sum of squares of fg * pixels will be larger. We return in hratio and vratio the * ratio of these two values. * (2) If there are no fg pixels, hratio and vratio are returned as 0.0. **/ l_ok pixFindNormalizedSquareSum(PIX *pixs, l_float32 *phratio, l_float32 *pvratio, l_float32 *pfract) { l_int32 i, w, h, empty; l_float32 sum, sumsq, uniform, val; NUMA *na; PIX *pixt; PROCNAME("pixFindNormalizedSquareSum"); if (phratio) *phratio = 0.0; if (pvratio) *pvratio = 0.0; if (pfract) *pfract = 0.0; if (!phratio && !pvratio) return ERROR_INT("nothing to do", procName, 1); if (!pixs || pixGetDepth(pixs) != 1) return ERROR_INT("pixs not defined or not 1 bpp", procName, 1); pixGetDimensions(pixs, &w, &h, NULL); empty = 0; if (phratio) { na = pixCountPixelsByRow(pixs, NULL); numaGetSum(na, &sum); /* fg pixels */ if (pfract) *pfract = sum / (l_float32)(w * h); if (sum != 0.0) { uniform = sum * sum / h; /* h*(sum / h)^2 */ sumsq = 0.0; for (i = 0; i < h; i++) { numaGetFValue(na, i, &val); sumsq += val * val; } *phratio = sumsq / uniform; } else { empty = 1; } numaDestroy(&na); } if (pvratio) { if (empty == 1) return 1; pixt = pixRotateOrth(pixs, 1); na = pixCountPixelsByRow(pixt, NULL); numaGetSum(na, &sum); if (pfract) *pfract = sum / (l_float32)(w * h); if (sum != 0.0) { uniform = sum * sum / w; sumsq = 0.0; for (i = 0; i < w; i++) { numaGetFValue(na, i, &val); sumsq += val * val; } *pvratio = sumsq / uniform; } else { empty = 1; } pixDestroy(&pixt); numaDestroy(&na); } return empty; }