解决tessert-ocr自动文本方向识别时内存泄漏的问题

This commit is contained in:
luoliangyi 2023-07-12 17:30:46 +08:00
parent a4e92b8d59
commit 97e5ff8c17
5 changed files with 58 additions and 54 deletions

View File

@ -97,62 +97,64 @@ static Pix* RemoveEnclosingCircle(Pix* pixs) {
* On return the blocks list owns all the constructed page layout. * On return the blocks list owns all the constructed page layout.
*/ */
int Tesseract::SegmentPage(const STRING* input_file, BLOCK_LIST* blocks, int Tesseract::SegmentPage(const STRING* input_file, BLOCK_LIST* blocks,
Tesseract* osd_tess, OSResults* osr) { Tesseract* osd_tess, OSResults* osr)
ASSERT_HOST(pix_binary_ != nullptr); {
int width = pixGetWidth(pix_binary_); ASSERT_HOST(pix_binary_ != nullptr);
int height = pixGetHeight(pix_binary_); int width = pixGetWidth(pix_binary_);
// Get page segmentation mode. int height = pixGetHeight(pix_binary_);
auto pageseg_mode = static_cast<PageSegMode>( // Get page segmentation mode.
static_cast<int>(tessedit_pageseg_mode)); auto pageseg_mode = static_cast<PageSegMode>(static_cast<int>(tessedit_pageseg_mode));
// If a UNLV zone file can be found, use that instead of segmentation. // If a UNLV zone file can be found, use that instead of segmentation.
if (!PSM_COL_FIND_ENABLED(pageseg_mode) && if (!PSM_COL_FIND_ENABLED(pageseg_mode) && input_file != nullptr && input_file->length() > 0)
input_file != nullptr && input_file->length() > 0) { {
STRING name = *input_file; STRING name = *input_file;
const char* lastdot = strrchr(name.string(), '.'); const char* lastdot = strrchr(name.string(), '.');
if (lastdot != nullptr) if (lastdot != nullptr)
name[lastdot - name.string()] = '\0'; name[lastdot - name.string()] = '\0';
read_unlv_file(name, width, height, blocks); read_unlv_file(name, width, height, blocks);
} }
if (blocks->empty()) {
// No UNLV file present. Work according to the PageSegMode. if (blocks->empty())
// First make a single block covering the whole image. {
BLOCK_IT block_it(blocks); // No UNLV file present. Work according to the PageSegMode.
auto* block = new BLOCK("", true, 0, 0, 0, 0, width, height); // First make a single block covering the whole image.
block->set_right_to_left(right_to_left()); BLOCK_IT block_it(blocks);
block_it.add_to_end(block); auto* block = new BLOCK("", true, 0, 0, 0, 0, width, height);
} else { block->set_right_to_left(right_to_left());
// UNLV file present. Use PSM_SINGLE_BLOCK. block_it.add_to_end(block);
pageseg_mode = PSM_SINGLE_BLOCK; }
} else
// The diacritic_blobs holds noise blobs that may be diacritics. They {
// are separated out on areas of the image that seem noisy and short-circuit // UNLV file present. Use PSM_SINGLE_BLOCK.
// the layout process, going straight from the initial partition creation pageseg_mode = PSM_SINGLE_BLOCK;
// right through to after word segmentation, where they are added to the }
// rej_cblobs list of the most appropriate word. From there classification // The diacritic_blobs holds noise blobs that may be diacritics. They
// will determine whether they are used. // are separated out on areas of the image that seem noisy and short-circuit
BLOBNBOX_LIST diacritic_blobs; // the layout process, going straight from the initial partition creation
int auto_page_seg_ret_val = 0; // right through to after word segmentation, where they are added to the
TO_BLOCK_LIST to_blocks; // rej_cblobs list of the most appropriate word. From there classification
if (PSM_OSD_ENABLED(pageseg_mode) || PSM_BLOCK_FIND_ENABLED(pageseg_mode) || // will determine whether they are used.
PSM_SPARSE(pageseg_mode)) { BLOBNBOX_LIST diacritic_blobs;
auto_page_seg_ret_val = AutoPageSeg( int auto_page_seg_ret_val = 0;
pageseg_mode, blocks, &to_blocks, TO_BLOCK_LIST to_blocks;
enable_noise_removal ? &diacritic_blobs : nullptr, osd_tess, osr); if (PSM_OSD_ENABLED(pageseg_mode) || PSM_BLOCK_FIND_ENABLED(pageseg_mode) || PSM_SPARSE(pageseg_mode))
if (pageseg_mode == PSM_OSD_ONLY) {
return auto_page_seg_ret_val; auto_page_seg_ret_val = AutoPageSeg(pageseg_mode, blocks, &to_blocks, enable_noise_removal ? &diacritic_blobs : nullptr, osd_tess, osr);
// To create blobs from the image region bounds uncomment this line: if (pageseg_mode == PSM_OSD_ONLY)
// to_blocks.clear(); // Uncomment to go back to the old mode. return auto_page_seg_ret_val;
} else { // To create blobs from the image region bounds uncomment this line:
deskew_ = FCOORD(1.0f, 0.0f); to_blocks.clear(); // Uncomment to go back to the old mode.
reskew_ = FCOORD(1.0f, 0.0f); } else {
if (pageseg_mode == PSM_CIRCLE_WORD) { deskew_ = FCOORD(1.0f, 0.0f);
Pix* pixcleaned = RemoveEnclosingCircle(pix_binary_); reskew_ = FCOORD(1.0f, 0.0f);
if (pixcleaned != nullptr) { if (pageseg_mode == PSM_CIRCLE_WORD) {
pixDestroy(&pix_binary_); Pix* pixcleaned = RemoveEnclosingCircle(pix_binary_);
pix_binary_ = pixcleaned; if (pixcleaned != nullptr) {
pixDestroy(&pix_binary_);
pix_binary_ = pixcleaned;
}
} }
} }
}
if (auto_page_seg_ret_val < 0) { if (auto_page_seg_ret_val < 0) {
return -1; return -1;
@ -213,6 +215,8 @@ int Tesseract::AutoPageSeg(PageSegMode pageseg_mode, BLOCK_LIST* blocks,
#if 1 #if 1
pixDestroy(&photomask_pix); pixDestroy(&photomask_pix);
pixDestroy(&musicmask_pix); pixDestroy(&musicmask_pix);
delete finder;
blocks->clear();
return 0; return 0;
#else #else
int result = 0; int result = 0;