解决tessert-ocr自动文本方向识别时内存泄漏的问题
This commit is contained in:
parent
a4e92b8d59
commit
97e5ff8c17
|
@ -97,62 +97,64 @@ static Pix* RemoveEnclosingCircle(Pix* pixs) {
|
||||||
* On return the blocks list owns all the constructed page layout.
|
* On return the blocks list owns all the constructed page layout.
|
||||||
*/
|
*/
|
||||||
int Tesseract::SegmentPage(const STRING* input_file, BLOCK_LIST* blocks,
|
int Tesseract::SegmentPage(const STRING* input_file, BLOCK_LIST* blocks,
|
||||||
Tesseract* osd_tess, OSResults* osr) {
|
Tesseract* osd_tess, OSResults* osr)
|
||||||
ASSERT_HOST(pix_binary_ != nullptr);
|
{
|
||||||
int width = pixGetWidth(pix_binary_);
|
ASSERT_HOST(pix_binary_ != nullptr);
|
||||||
int height = pixGetHeight(pix_binary_);
|
int width = pixGetWidth(pix_binary_);
|
||||||
// Get page segmentation mode.
|
int height = pixGetHeight(pix_binary_);
|
||||||
auto pageseg_mode = static_cast<PageSegMode>(
|
// Get page segmentation mode.
|
||||||
static_cast<int>(tessedit_pageseg_mode));
|
auto pageseg_mode = static_cast<PageSegMode>(static_cast<int>(tessedit_pageseg_mode));
|
||||||
// If a UNLV zone file can be found, use that instead of segmentation.
|
// If a UNLV zone file can be found, use that instead of segmentation.
|
||||||
if (!PSM_COL_FIND_ENABLED(pageseg_mode) &&
|
if (!PSM_COL_FIND_ENABLED(pageseg_mode) && input_file != nullptr && input_file->length() > 0)
|
||||||
input_file != nullptr && input_file->length() > 0) {
|
{
|
||||||
STRING name = *input_file;
|
STRING name = *input_file;
|
||||||
const char* lastdot = strrchr(name.string(), '.');
|
const char* lastdot = strrchr(name.string(), '.');
|
||||||
if (lastdot != nullptr)
|
if (lastdot != nullptr)
|
||||||
name[lastdot - name.string()] = '\0';
|
name[lastdot - name.string()] = '\0';
|
||||||
read_unlv_file(name, width, height, blocks);
|
read_unlv_file(name, width, height, blocks);
|
||||||
}
|
}
|
||||||
if (blocks->empty()) {
|
|
||||||
// No UNLV file present. Work according to the PageSegMode.
|
if (blocks->empty())
|
||||||
// First make a single block covering the whole image.
|
{
|
||||||
BLOCK_IT block_it(blocks);
|
// No UNLV file present. Work according to the PageSegMode.
|
||||||
auto* block = new BLOCK("", true, 0, 0, 0, 0, width, height);
|
// First make a single block covering the whole image.
|
||||||
block->set_right_to_left(right_to_left());
|
BLOCK_IT block_it(blocks);
|
||||||
block_it.add_to_end(block);
|
auto* block = new BLOCK("", true, 0, 0, 0, 0, width, height);
|
||||||
} else {
|
block->set_right_to_left(right_to_left());
|
||||||
// UNLV file present. Use PSM_SINGLE_BLOCK.
|
block_it.add_to_end(block);
|
||||||
pageseg_mode = PSM_SINGLE_BLOCK;
|
}
|
||||||
}
|
else
|
||||||
// The diacritic_blobs holds noise blobs that may be diacritics. They
|
{
|
||||||
// are separated out on areas of the image that seem noisy and short-circuit
|
// UNLV file present. Use PSM_SINGLE_BLOCK.
|
||||||
// the layout process, going straight from the initial partition creation
|
pageseg_mode = PSM_SINGLE_BLOCK;
|
||||||
// right through to after word segmentation, where they are added to the
|
}
|
||||||
// rej_cblobs list of the most appropriate word. From there classification
|
// The diacritic_blobs holds noise blobs that may be diacritics. They
|
||||||
// will determine whether they are used.
|
// are separated out on areas of the image that seem noisy and short-circuit
|
||||||
BLOBNBOX_LIST diacritic_blobs;
|
// the layout process, going straight from the initial partition creation
|
||||||
int auto_page_seg_ret_val = 0;
|
// right through to after word segmentation, where they are added to the
|
||||||
TO_BLOCK_LIST to_blocks;
|
// rej_cblobs list of the most appropriate word. From there classification
|
||||||
if (PSM_OSD_ENABLED(pageseg_mode) || PSM_BLOCK_FIND_ENABLED(pageseg_mode) ||
|
// will determine whether they are used.
|
||||||
PSM_SPARSE(pageseg_mode)) {
|
BLOBNBOX_LIST diacritic_blobs;
|
||||||
auto_page_seg_ret_val = AutoPageSeg(
|
int auto_page_seg_ret_val = 0;
|
||||||
pageseg_mode, blocks, &to_blocks,
|
TO_BLOCK_LIST to_blocks;
|
||||||
enable_noise_removal ? &diacritic_blobs : nullptr, osd_tess, osr);
|
if (PSM_OSD_ENABLED(pageseg_mode) || PSM_BLOCK_FIND_ENABLED(pageseg_mode) || PSM_SPARSE(pageseg_mode))
|
||||||
if (pageseg_mode == PSM_OSD_ONLY)
|
{
|
||||||
return auto_page_seg_ret_val;
|
auto_page_seg_ret_val = AutoPageSeg(pageseg_mode, blocks, &to_blocks, enable_noise_removal ? &diacritic_blobs : nullptr, osd_tess, osr);
|
||||||
// To create blobs from the image region bounds uncomment this line:
|
if (pageseg_mode == PSM_OSD_ONLY)
|
||||||
// to_blocks.clear(); // Uncomment to go back to the old mode.
|
return auto_page_seg_ret_val;
|
||||||
} else {
|
// To create blobs from the image region bounds uncomment this line:
|
||||||
deskew_ = FCOORD(1.0f, 0.0f);
|
to_blocks.clear(); // Uncomment to go back to the old mode.
|
||||||
reskew_ = FCOORD(1.0f, 0.0f);
|
} else {
|
||||||
if (pageseg_mode == PSM_CIRCLE_WORD) {
|
deskew_ = FCOORD(1.0f, 0.0f);
|
||||||
Pix* pixcleaned = RemoveEnclosingCircle(pix_binary_);
|
reskew_ = FCOORD(1.0f, 0.0f);
|
||||||
if (pixcleaned != nullptr) {
|
if (pageseg_mode == PSM_CIRCLE_WORD) {
|
||||||
pixDestroy(&pix_binary_);
|
Pix* pixcleaned = RemoveEnclosingCircle(pix_binary_);
|
||||||
pix_binary_ = pixcleaned;
|
if (pixcleaned != nullptr) {
|
||||||
|
pixDestroy(&pix_binary_);
|
||||||
|
pix_binary_ = pixcleaned;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
|
||||||
|
|
||||||
if (auto_page_seg_ret_val < 0) {
|
if (auto_page_seg_ret_val < 0) {
|
||||||
return -1;
|
return -1;
|
||||||
|
@ -213,6 +215,8 @@ int Tesseract::AutoPageSeg(PageSegMode pageseg_mode, BLOCK_LIST* blocks,
|
||||||
#if 1
|
#if 1
|
||||||
pixDestroy(&photomask_pix);
|
pixDestroy(&photomask_pix);
|
||||||
pixDestroy(&musicmask_pix);
|
pixDestroy(&musicmask_pix);
|
||||||
|
delete finder;
|
||||||
|
blocks->clear();
|
||||||
return 0;
|
return 0;
|
||||||
#else
|
#else
|
||||||
int result = 0;
|
int result = 0;
|
||||||
|
|
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Loading…
Reference in New Issue