Merge branch 'master' of http://192.168.10.5:8099/sane/code_app
This commit is contained in:
commit
e51f0f52a2
|
@ -56,6 +56,8 @@ HGBase_DisableInfo
|
|||
HGBase_WriteInfo
|
||||
|
||||
HGBase_GetLocalTime
|
||||
HGBase_GetTickCount
|
||||
HGBase_GetIntervalSeconds
|
||||
|
||||
HGBase_GetTmpPath
|
||||
HGBase_GetCurrentDir
|
||||
|
|
|
@ -104,6 +104,22 @@ BOOL CHGTestDlg::OnInitDialog()
|
|||
SetIcon(m_hIcon, TRUE); // 设置大图标
|
||||
SetIcon(m_hIcon, FALSE); // 设置小图标
|
||||
|
||||
HGImage image = NULL;
|
||||
HGImgFmt_LoadImage("D:\\2.jpg", 0, NULL, 0, 0, &image);
|
||||
if (NULL != image)
|
||||
{
|
||||
HGOCRMgr ocrMgr = NULL;
|
||||
HGImgProc_CreateOCRMgr(HGIMGPROC_OCRALGO_TESSERACT, &ocrMgr);
|
||||
if (NULL != ocrMgr)
|
||||
{
|
||||
HGUInt direct = 0;
|
||||
HGImgProc_ImageTextDirectOCR(ocrMgr, image, &direct);
|
||||
HGImgProc_DestroyOCRMgr(ocrMgr);
|
||||
}
|
||||
|
||||
HGBase_DestroyImage(image);
|
||||
}
|
||||
|
||||
HGTwain_LoadDSM(&m_dsm);
|
||||
HGTwain_OpenDSM(m_dsm, m_hWnd, DSEventCallback, this);
|
||||
|
||||
|
|
|
@ -34,4 +34,53 @@ HGResult HGAPI HGBase_GetLocalTime(HGTimeInfo* timeInfo)
|
|||
timeInfo->milliseconds = time.tv_usec / 1000;
|
||||
#endif
|
||||
return HGBASE_ERR_OK;
|
||||
}
|
||||
|
||||
HGResult HGAPI HGBase_GetTickCount(HGULonglong* tickCount)
|
||||
{
|
||||
if (NULL == tickCount)
|
||||
{
|
||||
return HGBASE_ERR_INVALIDARG;
|
||||
}
|
||||
|
||||
#if defined(HG_CMP_MSC)
|
||||
LARGE_INTEGER performanceCount;
|
||||
if (!QueryPerformanceCounter(&performanceCount))
|
||||
{
|
||||
return HGBASE_ERR_FAIL;
|
||||
}
|
||||
|
||||
*tickCount = performanceCount.QuadPart;
|
||||
#else
|
||||
struct timespec ts;
|
||||
if (0 != clock_gettime(CLOCK_MONOTONIC, &ts))
|
||||
{
|
||||
return HGBASE_ERR_FAIL;
|
||||
}
|
||||
|
||||
*tickCount = (HGULonglong)ts.tv_sec * (HGULonglong)1000000000 + (HGULonglong)ts.tv_nsec;
|
||||
#endif
|
||||
return HGBASE_ERR_OK;
|
||||
}
|
||||
|
||||
HGResult HGAPI HGBase_GetIntervalSeconds(HGULonglong startTickCount, HGULonglong stopTickCount, HGDouble* seconds)
|
||||
{
|
||||
if (startTickCount > stopTickCount || NULL == seconds)
|
||||
{
|
||||
return HGBASE_ERR_INVALIDARG;
|
||||
}
|
||||
|
||||
#if defined(HG_CMP_MSC)
|
||||
|
||||
LARGE_INTEGER frequency;
|
||||
if (!QueryPerformanceFrequency(&frequency))
|
||||
{
|
||||
return HGBASE_ERR_FAIL;
|
||||
}
|
||||
|
||||
*seconds = (HGDouble)(stopTickCount - startTickCount) / (HGDouble)frequency.QuadPart;
|
||||
#else
|
||||
* seconds = (HGDouble)(stopTickCount - startTickCount) / (HGDouble)1000000000.0;
|
||||
#endif
|
||||
return HGBASE_ERR_OK;
|
||||
}
|
|
@ -23,4 +23,10 @@ typedef struct
|
|||
|
||||
HGEXPORT HGResult HGAPI HGBase_GetLocalTime(HGTimeInfo *timeInfo);
|
||||
|
||||
/* get tick count */
|
||||
HGEXPORT HGResult HGAPI HGBase_GetTickCount(HGULonglong* tickCount);
|
||||
|
||||
/* get interval seconds */
|
||||
HGEXPORT HGResult HGAPI HGBase_GetIntervalSeconds(HGULonglong startTickCount, HGULonglong stopTickCount, HGDouble* seconds);
|
||||
|
||||
#endif /* __HGTIME_H__ */
|
|
@ -2,6 +2,7 @@
|
|||
#include "HGOCR.h"
|
||||
#include "HGOCRRetImpl.hpp"
|
||||
#include "HGOCRRetImpl.hpp"
|
||||
#include "../base/HGTime.h"
|
||||
#include "../base/HGUtility.h"
|
||||
#include "../base/HGInfo.h"
|
||||
#include "../imgfmt/HGBmp.h"
|
||||
|
@ -40,7 +41,7 @@ HGResult HGOCRTesseract::Init()
|
|||
HGBase_GetFilePath(moduleName, dataPath, 256);
|
||||
strcat(dataPath, "tessdata");
|
||||
|
||||
int rc = TessBaseAPIInit3(m_baseApi, dataPath, "chi_sim");
|
||||
int rc = TessBaseAPIInit3(m_baseApi, dataPath, "osd");
|
||||
if (0 != rc)
|
||||
{
|
||||
HGBase_WriteInfo(HGBASE_INFOTYPE_ERROR, "HGOCRTesseract::Init: TessBaseAPIInit3 fail");
|
||||
|
@ -171,7 +172,15 @@ HGResult HGOCRTesseract::ImageTextDirectOCR(HGImage image, HGUInt* direct)
|
|||
HGBase_GetImageDpi(image2, &xDpi, &yDpi);
|
||||
TessBaseAPISetSourceResolution(m_baseApi, (xDpi + yDpi) / 2);
|
||||
|
||||
HGULonglong tickStart = 0;
|
||||
HGBase_GetTickCount(&tickStart);
|
||||
int orientation = MyOSD(m_baseApi);
|
||||
HGULonglong tickEnd = 0;
|
||||
HGBase_GetTickCount(&tickEnd);
|
||||
HGDouble seconds = 0.0;
|
||||
HGBase_GetIntervalSeconds(tickStart, tickEnd, &seconds);
|
||||
HGBase_WriteInfo(HGBASE_INFOTYPE_DEBUG, "HGOCRTesseract::ImageTextDirectOCR seconds:%fs", seconds);
|
||||
|
||||
if (TessOrientation::ORIENTATION_PAGE_UP == orientation)
|
||||
*direct = HGIMGPROC_OCRTEXTDIRECT_ORI;
|
||||
else if (TessOrientation::ORIENTATION_PAGE_RIGHT == orientation)
|
||||
|
|
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
|
@ -0,0 +1,579 @@
|
|||
///////////////////////////////////////////////////////////////////////
|
||||
// File: osdetect.cpp
|
||||
// Description: Orientation and script detection.
|
||||
// Author: Samuel Charron
|
||||
// Ranjith Unnikrishnan
|
||||
//
|
||||
// (C) Copyright 2008, Google Inc.
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
///////////////////////////////////////////////////////////////////////
|
||||
|
||||
#include <algorithm>
|
||||
#include <cmath> // for std::fabs
|
||||
#include <memory>
|
||||
|
||||
#include "osdetect.h"
|
||||
|
||||
#include "blobbox.h"
|
||||
#include "blread.h"
|
||||
#include "colfind.h"
|
||||
#include "fontinfo.h"
|
||||
#include "imagefind.h"
|
||||
#include "linefind.h"
|
||||
#include "oldlist.h"
|
||||
#include "qrsequence.h"
|
||||
#include "ratngs.h"
|
||||
#include "strngs.h"
|
||||
#include "tabvector.h"
|
||||
#include "tesseractclass.h"
|
||||
#include "textord.h"
|
||||
|
||||
const float kSizeRatioToReject = 2.0;
|
||||
const int kMinAcceptableBlobHeight = 10;
|
||||
|
||||
const float kScriptAcceptRatio = 1.3;
|
||||
|
||||
const float kHanRatioInKorean = 0.7;
|
||||
const float kHanRatioInJapanese = 0.3;
|
||||
|
||||
const float kNonAmbiguousMargin = 1.0;
|
||||
|
||||
// General scripts
|
||||
static const char* han_script = "Han";
|
||||
static const char* latin_script = "Latin";
|
||||
static const char* katakana_script = "Katakana";
|
||||
static const char* hiragana_script = "Hiragana";
|
||||
static const char* hangul_script = "Hangul";
|
||||
|
||||
// Pseudo-scripts Name
|
||||
const char* ScriptDetector::korean_script_ = "Korean";
|
||||
const char* ScriptDetector::japanese_script_ = "Japanese";
|
||||
const char* ScriptDetector::fraktur_script_ = "Fraktur";
|
||||
|
||||
void OSResults::update_best_orientation() {
|
||||
float first = orientations[0];
|
||||
float second = orientations[1];
|
||||
best_result.orientation_id = 0;
|
||||
if (orientations[0] < orientations[1]) {
|
||||
first = orientations[1];
|
||||
second = orientations[0];
|
||||
best_result.orientation_id = 1;
|
||||
}
|
||||
for (int i = 2; i < 4; ++i) {
|
||||
if (orientations[i] > first) {
|
||||
second = first;
|
||||
first = orientations[i];
|
||||
best_result.orientation_id = i;
|
||||
} else if (orientations[i] > second) {
|
||||
second = orientations[i];
|
||||
}
|
||||
}
|
||||
// Store difference of top two orientation scores.
|
||||
best_result.oconfidence = first - second;
|
||||
}
|
||||
|
||||
void OSResults::set_best_orientation(int orientation_id) {
|
||||
best_result.orientation_id = orientation_id;
|
||||
best_result.oconfidence = 0;
|
||||
}
|
||||
|
||||
void OSResults::update_best_script(int orientation) {
|
||||
// We skip index 0 to ignore the "Common" script.
|
||||
float first = scripts_na[orientation][1];
|
||||
float second = scripts_na[orientation][2];
|
||||
best_result.script_id = 1;
|
||||
if (scripts_na[orientation][1] < scripts_na[orientation][2]) {
|
||||
first = scripts_na[orientation][2];
|
||||
second = scripts_na[orientation][1];
|
||||
best_result.script_id = 2;
|
||||
}
|
||||
for (int i = 3; i < kMaxNumberOfScripts; ++i) {
|
||||
if (scripts_na[orientation][i] > first) {
|
||||
best_result.script_id = i;
|
||||
second = first;
|
||||
first = scripts_na[orientation][i];
|
||||
} else if (scripts_na[orientation][i] > second) {
|
||||
second = scripts_na[orientation][i];
|
||||
}
|
||||
}
|
||||
best_result.sconfidence = (second == 0.0f) ? 2.0f :
|
||||
(first / second - 1.0) / (kScriptAcceptRatio - 1.0);
|
||||
}
|
||||
|
||||
int OSResults::get_best_script(int orientation_id) const {
|
||||
int max_id = -1;
|
||||
for (int j = 0; j < kMaxNumberOfScripts; ++j) {
|
||||
const char *script = unicharset->get_script_from_script_id(j);
|
||||
if (strcmp(script, "Common") && strcmp(script, "NULL")) {
|
||||
if (max_id == -1 ||
|
||||
scripts_na[orientation_id][j] > scripts_na[orientation_id][max_id])
|
||||
max_id = j;
|
||||
}
|
||||
}
|
||||
return max_id;
|
||||
}
|
||||
|
||||
// Print the script scores for all possible orientations.
|
||||
void OSResults::print_scores(void) const {
|
||||
for (int i = 0; i < 4; ++i) {
|
||||
tprintf("Orientation id #%d", i);
|
||||
print_scores(i);
|
||||
}
|
||||
}
|
||||
|
||||
// Print the script scores for the given candidate orientation.
|
||||
void OSResults::print_scores(int orientation_id) const {
|
||||
for (int j = 0; j < kMaxNumberOfScripts; ++j) {
|
||||
if (scripts_na[orientation_id][j]) {
|
||||
tprintf("%12s\t: %f\n", unicharset->get_script_from_script_id(j),
|
||||
scripts_na[orientation_id][j]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Accumulate scores with given OSResults instance and update the best script.
|
||||
void OSResults::accumulate(const OSResults& osr) {
|
||||
for (int i = 0; i < 4; ++i) {
|
||||
orientations[i] += osr.orientations[i];
|
||||
for (int j = 0; j < kMaxNumberOfScripts; ++j)
|
||||
scripts_na[i][j] += osr.scripts_na[i][j];
|
||||
}
|
||||
unicharset = osr.unicharset;
|
||||
update_best_orientation();
|
||||
update_best_script(best_result.orientation_id);
|
||||
}
|
||||
|
||||
// Detect and erase horizontal/vertical lines and picture regions from the
|
||||
// image, so that non-text blobs are removed from consideration.
|
||||
static void remove_nontext_regions(tesseract::Tesseract *tess,
|
||||
BLOCK_LIST *blocks,
|
||||
TO_BLOCK_LIST *to_blocks) {
|
||||
Pix *pix = tess->pix_binary();
|
||||
ASSERT_HOST(pix != nullptr);
|
||||
int vertical_x = 0;
|
||||
int vertical_y = 1;
|
||||
tesseract::TabVector_LIST v_lines;
|
||||
tesseract::TabVector_LIST h_lines;
|
||||
int resolution;
|
||||
if (kMinCredibleResolution > pixGetXRes(pix)) {
|
||||
resolution = kMinCredibleResolution;
|
||||
tprintf("Warning. Invalid resolution %d dpi. Using %d instead.\n",
|
||||
pixGetXRes(pix), resolution);
|
||||
} else {
|
||||
resolution = pixGetXRes(pix);
|
||||
}
|
||||
|
||||
tesseract::LineFinder::FindAndRemoveLines(resolution, false, pix,
|
||||
&vertical_x, &vertical_y,
|
||||
nullptr, &v_lines, &h_lines);
|
||||
Pix* im_pix = tesseract::ImageFind::FindImages(pix, nullptr);
|
||||
if (im_pix != nullptr) {
|
||||
pixSubtract(pix, pix, im_pix);
|
||||
pixDestroy(&im_pix);
|
||||
}
|
||||
tess->mutable_textord()->find_components(tess->pix_binary(),
|
||||
blocks, to_blocks);
|
||||
}
|
||||
|
||||
// Find connected components in the page and process a subset until finished or
|
||||
// a stopping criterion is met.
|
||||
// Returns the number of blobs used in making the estimate. 0 implies failure.
|
||||
int orientation_and_script_detection(STRING& filename,
|
||||
OSResults* osr,
|
||||
tesseract::Tesseract* tess) {
|
||||
STRING name = filename; //truncated name
|
||||
const char *lastdot; //of name
|
||||
TBOX page_box;
|
||||
|
||||
lastdot = strrchr (name.string (), '.');
|
||||
if (lastdot != nullptr)
|
||||
name[lastdot-name.string()] = '\0';
|
||||
|
||||
ASSERT_HOST(tess->pix_binary() != nullptr);
|
||||
int width = pixGetWidth(tess->pix_binary());
|
||||
int height = pixGetHeight(tess->pix_binary());
|
||||
|
||||
BLOCK_LIST blocks;
|
||||
if (!read_unlv_file(name, width, height, &blocks))
|
||||
FullPageBlock(width, height, &blocks);
|
||||
|
||||
// Try to remove non-text regions from consideration.
|
||||
TO_BLOCK_LIST land_blocks, port_blocks;
|
||||
remove_nontext_regions(tess, &blocks, &port_blocks);
|
||||
|
||||
if (port_blocks.empty()) {
|
||||
// page segmentation did not succeed, so we need to find_components first.
|
||||
tess->mutable_textord()->find_components(tess->pix_binary(),
|
||||
&blocks, &port_blocks);
|
||||
} else {
|
||||
page_box.set_left(0);
|
||||
page_box.set_bottom(0);
|
||||
page_box.set_right(width);
|
||||
page_box.set_top(height);
|
||||
// Filter_blobs sets up the TO_BLOCKs the same as find_components does.
|
||||
tess->mutable_textord()->filter_blobs(page_box.topright(),
|
||||
&port_blocks, true);
|
||||
}
|
||||
|
||||
return os_detect(&port_blocks, osr, tess);
|
||||
}
|
||||
|
||||
// Filter and sample the blobs.
|
||||
// Returns a non-zero number of blobs if the page was successfully processed, or
|
||||
// zero if the page had too few characters to be reliable
|
||||
int os_detect(TO_BLOCK_LIST* port_blocks, OSResults* osr,
|
||||
tesseract::Tesseract* tess) {
|
||||
int blobs_total = 0;
|
||||
TO_BLOCK_IT block_it;
|
||||
block_it.set_to_list(port_blocks);
|
||||
|
||||
BLOBNBOX_CLIST filtered_list;
|
||||
BLOBNBOX_C_IT filtered_it(&filtered_list);
|
||||
|
||||
for (block_it.mark_cycle_pt(); !block_it.cycled_list();
|
||||
block_it.forward ()) {
|
||||
TO_BLOCK* to_block = block_it.data();
|
||||
if (to_block->block->pdblk.poly_block() &&
|
||||
!to_block->block->pdblk.poly_block()->IsText()) continue;
|
||||
BLOBNBOX_IT bbox_it;
|
||||
bbox_it.set_to_list(&to_block->blobs);
|
||||
for (bbox_it.mark_cycle_pt (); !bbox_it.cycled_list ();
|
||||
bbox_it.forward ()) {
|
||||
BLOBNBOX* bbox = bbox_it.data();
|
||||
C_BLOB* blob = bbox->cblob();
|
||||
TBOX box = blob->bounding_box();
|
||||
++blobs_total;
|
||||
|
||||
// Catch illegal value of box width and avoid division by zero.
|
||||
if (box.width() == 0) continue;
|
||||
// TODO: Can height and width be negative? If not, remove fabs.
|
||||
float y_x = std::fabs((box.height() * 1.0f) / box.width());
|
||||
float x_y = 1.0f / y_x;
|
||||
// Select a >= 1.0 ratio
|
||||
float ratio = x_y > y_x ? x_y : y_x;
|
||||
// Blob is ambiguous
|
||||
if (ratio > kSizeRatioToReject) continue;
|
||||
if (box.height() < kMinAcceptableBlobHeight) continue;
|
||||
filtered_it.add_to_end(bbox);
|
||||
}
|
||||
}
|
||||
return os_detect_blobs(nullptr, &filtered_list, osr, tess);
|
||||
}
|
||||
|
||||
// Detect orientation and script from a list of blobs.
|
||||
// Returns a non-zero number of blobs if the list was successfully processed, or
|
||||
// zero if the list had too few characters to be reliable.
|
||||
// If allowed_scripts is non-null and non-empty, it is a list of scripts that
|
||||
// constrains both orientation and script detection to consider only scripts
|
||||
// from the list.
|
||||
int os_detect_blobs(const GenericVector<int>* allowed_scripts,
|
||||
BLOBNBOX_CLIST* blob_list, OSResults* osr,
|
||||
tesseract::Tesseract* tess) {
|
||||
OSResults osr_;
|
||||
int minCharactersToTry = tess->min_characters_to_try;
|
||||
int maxCharactersToTry = /*5 * minCharactersToTry*/ 150;
|
||||
if (osr == nullptr)
|
||||
osr = &osr_;
|
||||
|
||||
osr->unicharset = &tess->unicharset;
|
||||
OrientationDetector o(allowed_scripts, osr);
|
||||
ScriptDetector s(allowed_scripts, osr, tess);
|
||||
|
||||
BLOBNBOX_C_IT filtered_it(blob_list);
|
||||
int real_max = std::min(filtered_it.length(), maxCharactersToTry);
|
||||
// tprintf("Total blobs found = %d\n", blobs_total);
|
||||
// tprintf("Number of blobs post-filtering = %d\n", filtered_it.length());
|
||||
// tprintf("Number of blobs to try = %d\n", real_max);
|
||||
|
||||
// If there are too few characters, skip this page entirely.
|
||||
if (real_max < minCharactersToTry / /*2*/ 3) {
|
||||
tprintf("Too few characters. Skipping this page\n");
|
||||
return 0;
|
||||
}
|
||||
|
||||
auto** blobs = new BLOBNBOX*[filtered_it.length()];
|
||||
int number_of_blobs = 0;
|
||||
for (filtered_it.mark_cycle_pt (); !filtered_it.cycled_list ();
|
||||
filtered_it.forward ()) {
|
||||
blobs[number_of_blobs++] = filtered_it.data();
|
||||
}
|
||||
QRSequenceGenerator sequence(number_of_blobs);
|
||||
int num_blobs_evaluated = 0;
|
||||
for (int i = 0; i < real_max; ++i) {
|
||||
if (os_detect_blob(blobs[sequence.GetVal()], &o, &s, osr, tess)
|
||||
&& i > minCharactersToTry) {
|
||||
break;
|
||||
}
|
||||
++num_blobs_evaluated;
|
||||
}
|
||||
delete [] blobs;
|
||||
|
||||
// Make sure the best_result is up-to-date
|
||||
int orientation = o.get_orientation();
|
||||
osr->update_best_script(orientation);
|
||||
return num_blobs_evaluated;
|
||||
}
|
||||
|
||||
// Processes a single blob to estimate script and orientation.
|
||||
// Return true if estimate of orientation and script satisfies stopping
|
||||
// criteria.
|
||||
bool os_detect_blob(BLOBNBOX* bbox, OrientationDetector* o,
|
||||
ScriptDetector* s, OSResults* osr,
|
||||
tesseract::Tesseract* tess) {
|
||||
tess->tess_cn_matching.set_value(true); // turn it on
|
||||
tess->tess_bn_matching.set_value(false);
|
||||
C_BLOB* blob = bbox->cblob();
|
||||
TBLOB* tblob = TBLOB::PolygonalCopy(tess->poly_allow_detailed_fx, blob);
|
||||
TBOX box = tblob->bounding_box();
|
||||
FCOORD current_rotation(1.0f, 0.0f);
|
||||
FCOORD rotation90(0.0f, 1.0f);
|
||||
BLOB_CHOICE_LIST ratings[4];
|
||||
// Test the 4 orientations
|
||||
for (int i = 0; i < 4; ++i) {
|
||||
// Normalize the blob. Set the origin to the place we want to be the
|
||||
// bottom-middle after rotation.
|
||||
// Scaling is to make the rotated height the x-height.
|
||||
float scaling = static_cast<float>(kBlnXHeight) / box.height();
|
||||
float x_origin = (box.left() + box.right()) / 2.0f;
|
||||
float y_origin = (box.bottom() + box.top()) / 2.0f;
|
||||
if (i == 0 || i == 2) {
|
||||
// Rotation is 0 or 180.
|
||||
y_origin = i == 0 ? box.bottom() : box.top();
|
||||
} else {
|
||||
// Rotation is 90 or 270.
|
||||
scaling = static_cast<float>(kBlnXHeight) / box.width();
|
||||
x_origin = i == 1 ? box.left() : box.right();
|
||||
}
|
||||
std::unique_ptr<TBLOB> rotated_blob(new TBLOB(*tblob));
|
||||
rotated_blob->Normalize(nullptr, ¤t_rotation, nullptr,
|
||||
x_origin, y_origin, scaling, scaling,
|
||||
0.0f, static_cast<float>(kBlnBaselineOffset),
|
||||
false, nullptr);
|
||||
tess->AdaptiveClassifier(rotated_blob.get(), ratings + i);
|
||||
current_rotation.rotate(rotation90);
|
||||
}
|
||||
delete tblob;
|
||||
|
||||
bool stop = o->detect_blob(ratings);
|
||||
s->detect_blob(ratings);
|
||||
int orientation = o->get_orientation();
|
||||
stop = s->must_stop(orientation) && stop;
|
||||
return stop;
|
||||
}
|
||||
|
||||
|
||||
OrientationDetector::OrientationDetector(
|
||||
const GenericVector<int>* allowed_scripts, OSResults* osr) {
|
||||
osr_ = osr;
|
||||
allowed_scripts_ = allowed_scripts;
|
||||
}
|
||||
|
||||
// Score the given blob and return true if it is now sure of the orientation
|
||||
// after adding this block.
|
||||
bool OrientationDetector::detect_blob(BLOB_CHOICE_LIST* scores) {
|
||||
float blob_o_score[4] = {0.0f, 0.0f, 0.0f, 0.0f};
|
||||
float total_blob_o_score = 0.0f;
|
||||
|
||||
for (int i = 0; i < 4; ++i) {
|
||||
BLOB_CHOICE_IT choice_it(scores + i);
|
||||
if (!choice_it.empty()) {
|
||||
BLOB_CHOICE* choice = nullptr;
|
||||
if (allowed_scripts_ != nullptr && !allowed_scripts_->empty()) {
|
||||
// Find the top choice in an allowed script.
|
||||
for (choice_it.mark_cycle_pt(); !choice_it.cycled_list() &&
|
||||
choice == nullptr; choice_it.forward()) {
|
||||
int choice_script = choice_it.data()->script_id();
|
||||
int s = 0;
|
||||
for (s = 0; s < allowed_scripts_->size(); ++s) {
|
||||
if ((*allowed_scripts_)[s] == choice_script) {
|
||||
choice = choice_it.data();
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
} else {
|
||||
choice = choice_it.data();
|
||||
}
|
||||
if (choice != nullptr) {
|
||||
// The certainty score ranges between [-20,0]. This is converted here to
|
||||
// [0,1], with 1 indicating best match.
|
||||
blob_o_score[i] = 1 + 0.05 * choice->certainty();
|
||||
total_blob_o_score += blob_o_score[i];
|
||||
}
|
||||
}
|
||||
}
|
||||
if (total_blob_o_score == 0.0) return false;
|
||||
// Fill in any blanks with the worst score of the others. This is better than
|
||||
// picking an arbitrary probability for it and way better than -inf.
|
||||
float worst_score = 0.0f;
|
||||
int num_good_scores = 0;
|
||||
for (float f : blob_o_score) {
|
||||
if (f > 0.0f) {
|
||||
++num_good_scores;
|
||||
if (worst_score == 0.0f || f < worst_score)
|
||||
worst_score = f;
|
||||
}
|
||||
}
|
||||
if (num_good_scores == 1) {
|
||||
// Lower worst if there is only one.
|
||||
worst_score /= 2.0f;
|
||||
}
|
||||
for (float& f : blob_o_score) {
|
||||
if (f == 0.0f) {
|
||||
f = worst_score;
|
||||
total_blob_o_score += worst_score;
|
||||
}
|
||||
}
|
||||
// Normalize the orientation scores for the blob and use them to
|
||||
// update the aggregated orientation score.
|
||||
for (int i = 0; total_blob_o_score != 0 && i < 4; ++i) {
|
||||
osr_->orientations[i] += log(blob_o_score[i] / total_blob_o_score);
|
||||
}
|
||||
|
||||
// TODO(ranjith) Add an early exit test, based on min_orientation_margin,
|
||||
// as used in pagesegmain.cpp.
|
||||
return false;
|
||||
}
|
||||
|
||||
int OrientationDetector::get_orientation() {
|
||||
osr_->update_best_orientation();
|
||||
return osr_->best_result.orientation_id;
|
||||
}
|
||||
|
||||
|
||||
ScriptDetector::ScriptDetector(const GenericVector<int>* allowed_scripts,
|
||||
OSResults* osr, tesseract::Tesseract* tess) {
|
||||
osr_ = osr;
|
||||
tess_ = tess;
|
||||
allowed_scripts_ = allowed_scripts;
|
||||
katakana_id_ = tess_->unicharset.add_script(katakana_script);
|
||||
hiragana_id_ = tess_->unicharset.add_script(hiragana_script);
|
||||
han_id_ = tess_->unicharset.add_script(han_script);
|
||||
hangul_id_ = tess_->unicharset.add_script(hangul_script);
|
||||
japanese_id_ = tess_->unicharset.add_script(japanese_script_);
|
||||
korean_id_ = tess_->unicharset.add_script(korean_script_);
|
||||
latin_id_ = tess_->unicharset.add_script(latin_script);
|
||||
fraktur_id_ = tess_->unicharset.add_script(fraktur_script_);
|
||||
}
|
||||
|
||||
|
||||
// Score the given blob and return true if it is now sure of the script after
|
||||
// adding this blob.
|
||||
void ScriptDetector::detect_blob(BLOB_CHOICE_LIST* scores) {
|
||||
for (int i = 0; i < 4; ++i) {
|
||||
bool done[kMaxNumberOfScripts] = { false };
|
||||
|
||||
BLOB_CHOICE_IT choice_it;
|
||||
choice_it.set_to_list(scores + i);
|
||||
|
||||
float prev_score = -1;
|
||||
int script_count = 0;
|
||||
int prev_id = -1;
|
||||
int prev_fontinfo_id = -1;
|
||||
const char* prev_unichar = "";
|
||||
const char* unichar = "";
|
||||
|
||||
for (choice_it.mark_cycle_pt(); !choice_it.cycled_list();
|
||||
choice_it.forward()) {
|
||||
BLOB_CHOICE* choice = choice_it.data();
|
||||
int id = choice->script_id();
|
||||
if (allowed_scripts_ != nullptr && !allowed_scripts_->empty()) {
|
||||
// Check that the choice is in an allowed script.
|
||||
int s = 0;
|
||||
for (s = 0; s < allowed_scripts_->size(); ++s) {
|
||||
if ((*allowed_scripts_)[s] == id) break;
|
||||
}
|
||||
if (s == allowed_scripts_->size()) continue; // Not found in list.
|
||||
}
|
||||
// Script already processed before.
|
||||
if (done[id]) continue;
|
||||
done[id] = true;
|
||||
|
||||
unichar = tess_->unicharset.id_to_unichar(choice->unichar_id());
|
||||
// Save data from the first match
|
||||
if (prev_score < 0) {
|
||||
prev_score = -choice->certainty();
|
||||
script_count = 1;
|
||||
prev_id = id;
|
||||
prev_unichar = unichar;
|
||||
prev_fontinfo_id = choice->fontinfo_id();
|
||||
} else if (-choice->certainty() < prev_score + kNonAmbiguousMargin) {
|
||||
++script_count;
|
||||
}
|
||||
|
||||
if (strlen(prev_unichar) == 1)
|
||||
if (unichar[0] >= '0' && unichar[0] <= '9')
|
||||
break;
|
||||
|
||||
// if script_count is >= 2, character is ambiguous, skip other matches
|
||||
// since they are useless.
|
||||
if (script_count >= 2)
|
||||
break;
|
||||
}
|
||||
// Character is non ambiguous
|
||||
if (script_count == 1) {
|
||||
// Update the score of the winning script
|
||||
osr_->scripts_na[i][prev_id] += 1.0;
|
||||
|
||||
// Workaround for Fraktur
|
||||
if (prev_id == latin_id_) {
|
||||
if (prev_fontinfo_id >= 0) {
|
||||
const tesseract::FontInfo &fi =
|
||||
tess_->get_fontinfo_table().get(prev_fontinfo_id);
|
||||
//printf("Font: %s i:%i b:%i f:%i s:%i k:%i (%s)\n", fi.name,
|
||||
// fi.is_italic(), fi.is_bold(), fi.is_fixed_pitch(),
|
||||
// fi.is_serif(), fi.is_fraktur(),
|
||||
// prev_unichar);
|
||||
if (fi.is_fraktur()) {
|
||||
osr_->scripts_na[i][prev_id] -= 1.0;
|
||||
osr_->scripts_na[i][fraktur_id_] += 1.0;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Update Japanese / Korean pseudo-scripts
|
||||
if (prev_id == katakana_id_)
|
||||
osr_->scripts_na[i][japanese_id_] += 1.0;
|
||||
if (prev_id == hiragana_id_)
|
||||
osr_->scripts_na[i][japanese_id_] += 1.0;
|
||||
if (prev_id == hangul_id_)
|
||||
osr_->scripts_na[i][korean_id_] += 1.0;
|
||||
if (prev_id == han_id_) {
|
||||
osr_->scripts_na[i][korean_id_] += kHanRatioInKorean;
|
||||
osr_->scripts_na[i][japanese_id_] += kHanRatioInJapanese;
|
||||
}
|
||||
}
|
||||
} // iterate over each orientation
|
||||
}
|
||||
|
||||
bool ScriptDetector::must_stop(int orientation) {
|
||||
osr_->update_best_script(orientation);
|
||||
return osr_->best_result.sconfidence > 1;
|
||||
}
|
||||
|
||||
// Helper method to convert an orientation index to its value in degrees.
|
||||
// The value represents the amount of clockwise rotation in degrees that must be
|
||||
// applied for the text to be upright (readable).
|
||||
int OrientationIdToValue(const int& id) {
|
||||
switch (id) {
|
||||
case 0:
|
||||
return 0;
|
||||
case 1:
|
||||
return 270;
|
||||
case 2:
|
||||
return 180;
|
||||
case 3:
|
||||
return 90;
|
||||
default:
|
||||
return -1;
|
||||
}
|
||||
}
|
|
@ -0,0 +1,415 @@
|
|||
/**********************************************************************
|
||||
* File: pagesegmain.cpp
|
||||
* Description: Top-level page segmenter for Tesseract.
|
||||
* Author: Ray Smith
|
||||
*
|
||||
* (C) Copyright 2008, Google Inc.
|
||||
** Licensed under the Apache License, Version 2.0 (the "License");
|
||||
** you may not use this file except in compliance with the License.
|
||||
** You may obtain a copy of the License at
|
||||
** http://www.apache.org/licenses/LICENSE-2.0
|
||||
** Unless required by applicable law or agreed to in writing, software
|
||||
** distributed under the License is distributed on an "AS IS" BASIS,
|
||||
** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
** See the License for the specific language governing permissions and
|
||||
** limitations under the License.
|
||||
*
|
||||
**********************************************************************/
|
||||
|
||||
#ifdef _WIN32
|
||||
#ifndef unlink
|
||||
#include <io.h>
|
||||
#endif
|
||||
#else
|
||||
#include <unistd.h>
|
||||
#endif // _WIN32
|
||||
|
||||
// Include automatically generated configuration file if running autoconf.
|
||||
#ifdef HAVE_CONFIG_H
|
||||
#include "config_auto.h"
|
||||
#endif
|
||||
|
||||
#include "allheaders.h"
|
||||
#include "blobbox.h"
|
||||
#include "blread.h"
|
||||
#include "colfind.h"
|
||||
#include "debugpixa.h"
|
||||
#include "equationdetect.h"
|
||||
#include "imagefind.h"
|
||||
#include "linefind.h"
|
||||
#include "makerow.h"
|
||||
#include "osdetect.h"
|
||||
#include "tabvector.h"
|
||||
#include "tesseractclass.h"
|
||||
#include "tessvars.h"
|
||||
#include "textord.h"
|
||||
#include "tordmain.h"
|
||||
#include "wordseg.h"
|
||||
|
||||
namespace tesseract {
|
||||
|
||||
// Max erosions to perform in removing an enclosing circle.
|
||||
const int kMaxCircleErosions = 8;
|
||||
|
||||
// Helper to remove an enclosing circle from an image.
|
||||
// If there isn't one, then the image will most likely get badly mangled.
|
||||
// The returned pix must be pixDestroyed after use. nullptr may be returned
|
||||
// if the image doesn't meet the trivial conditions that it uses to determine
|
||||
// success.
|
||||
static Pix* RemoveEnclosingCircle(Pix* pixs) {
|
||||
Pix* pixsi = pixInvert(nullptr, pixs);
|
||||
Pix* pixc = pixCreateTemplate(pixs);
|
||||
pixSetOrClearBorder(pixc, 1, 1, 1, 1, PIX_SET);
|
||||
pixSeedfillBinary(pixc, pixc, pixsi, 4);
|
||||
pixInvert(pixc, pixc);
|
||||
pixDestroy(&pixsi);
|
||||
Pix* pixt = pixAnd(nullptr, pixs, pixc);
|
||||
l_int32 max_count;
|
||||
pixCountConnComp(pixt, 8, &max_count);
|
||||
// The count has to go up before we start looking for the minimum.
|
||||
l_int32 min_count = INT32_MAX;
|
||||
Pix* pixout = nullptr;
|
||||
for (int i = 1; i < kMaxCircleErosions; i++) {
|
||||
pixDestroy(&pixt);
|
||||
pixErodeBrick(pixc, pixc, 3, 3);
|
||||
pixt = pixAnd(nullptr, pixs, pixc);
|
||||
l_int32 count;
|
||||
pixCountConnComp(pixt, 8, &count);
|
||||
if (i == 1 || count > max_count) {
|
||||
max_count = count;
|
||||
min_count = count;
|
||||
} else if (i > 1 && count < min_count) {
|
||||
min_count = count;
|
||||
pixDestroy(&pixout);
|
||||
pixout = pixCopy(nullptr, pixt); // Save the best.
|
||||
} else if (count >= min_count) {
|
||||
break; // We have passed by the best.
|
||||
}
|
||||
}
|
||||
pixDestroy(&pixt);
|
||||
pixDestroy(&pixc);
|
||||
return pixout;
|
||||
}
|
||||
|
||||
/**
|
||||
* Segment the page according to the current value of tessedit_pageseg_mode.
|
||||
* pix_binary_ is used as the source image and should not be nullptr.
|
||||
* On return the blocks list owns all the constructed page layout.
|
||||
*/
|
||||
int Tesseract::SegmentPage(const STRING* input_file, BLOCK_LIST* blocks,
|
||||
Tesseract* osd_tess, OSResults* osr) {
|
||||
ASSERT_HOST(pix_binary_ != nullptr);
|
||||
int width = pixGetWidth(pix_binary_);
|
||||
int height = pixGetHeight(pix_binary_);
|
||||
// Get page segmentation mode.
|
||||
auto pageseg_mode = static_cast<PageSegMode>(
|
||||
static_cast<int>(tessedit_pageseg_mode));
|
||||
// If a UNLV zone file can be found, use that instead of segmentation.
|
||||
if (!PSM_COL_FIND_ENABLED(pageseg_mode) &&
|
||||
input_file != nullptr && input_file->length() > 0) {
|
||||
STRING name = *input_file;
|
||||
const char* lastdot = strrchr(name.string(), '.');
|
||||
if (lastdot != nullptr)
|
||||
name[lastdot - name.string()] = '\0';
|
||||
read_unlv_file(name, width, height, blocks);
|
||||
}
|
||||
if (blocks->empty()) {
|
||||
// No UNLV file present. Work according to the PageSegMode.
|
||||
// First make a single block covering the whole image.
|
||||
BLOCK_IT block_it(blocks);
|
||||
auto* block = new BLOCK("", true, 0, 0, 0, 0, width, height);
|
||||
block->set_right_to_left(right_to_left());
|
||||
block_it.add_to_end(block);
|
||||
} else {
|
||||
// UNLV file present. Use PSM_SINGLE_BLOCK.
|
||||
pageseg_mode = PSM_SINGLE_BLOCK;
|
||||
}
|
||||
// The diacritic_blobs holds noise blobs that may be diacritics. They
|
||||
// are separated out on areas of the image that seem noisy and short-circuit
|
||||
// the layout process, going straight from the initial partition creation
|
||||
// right through to after word segmentation, where they are added to the
|
||||
// rej_cblobs list of the most appropriate word. From there classification
|
||||
// will determine whether they are used.
|
||||
BLOBNBOX_LIST diacritic_blobs;
|
||||
int auto_page_seg_ret_val = 0;
|
||||
TO_BLOCK_LIST to_blocks;
|
||||
if (PSM_OSD_ENABLED(pageseg_mode) || PSM_BLOCK_FIND_ENABLED(pageseg_mode) ||
|
||||
PSM_SPARSE(pageseg_mode)) {
|
||||
auto_page_seg_ret_val = AutoPageSeg(
|
||||
pageseg_mode, blocks, &to_blocks,
|
||||
enable_noise_removal ? &diacritic_blobs : nullptr, osd_tess, osr);
|
||||
if (pageseg_mode == PSM_OSD_ONLY)
|
||||
return auto_page_seg_ret_val;
|
||||
// To create blobs from the image region bounds uncomment this line:
|
||||
// to_blocks.clear(); // Uncomment to go back to the old mode.
|
||||
} else {
|
||||
deskew_ = FCOORD(1.0f, 0.0f);
|
||||
reskew_ = FCOORD(1.0f, 0.0f);
|
||||
if (pageseg_mode == PSM_CIRCLE_WORD) {
|
||||
Pix* pixcleaned = RemoveEnclosingCircle(pix_binary_);
|
||||
if (pixcleaned != nullptr) {
|
||||
pixDestroy(&pix_binary_);
|
||||
pix_binary_ = pixcleaned;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (auto_page_seg_ret_val < 0) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
if (blocks->empty()) {
|
||||
if (textord_debug_tabfind)
|
||||
tprintf("Empty page\n");
|
||||
return 0; // AutoPageSeg found an empty page.
|
||||
}
|
||||
bool splitting =
|
||||
pageseg_devanagari_split_strategy != ShiroRekhaSplitter::NO_SPLIT;
|
||||
bool cjk_mode = textord_use_cjk_fp_model;
|
||||
|
||||
textord_.TextordPage(pageseg_mode, reskew_, width, height, pix_binary_,
|
||||
pix_thresholds_, pix_grey_, splitting || cjk_mode,
|
||||
&diacritic_blobs, blocks, &to_blocks);
|
||||
return auto_page_seg_ret_val;
|
||||
}
|
||||
|
||||
/**
|
||||
* Auto page segmentation. Divide the page image into blocks of uniform
|
||||
* text linespacing and images.
|
||||
*
|
||||
* Resolution (in ppi) is derived from the input image.
|
||||
*
|
||||
* The output goes in the blocks list with corresponding TO_BLOCKs in the
|
||||
* to_blocks list.
|
||||
*
|
||||
* If !PSM_COL_FIND_ENABLED(pageseg_mode), then no attempt is made to divide
|
||||
* the image into columns, but multiple blocks are still made if the text is
|
||||
* of non-uniform linespacing.
|
||||
*
|
||||
* If diacritic_blobs is non-null, then diacritics/noise blobs, that would
|
||||
* confuse layout analysis by causing textline overlap, are placed there,
|
||||
* with the expectation that they will be reassigned to words later and
|
||||
* noise/diacriticness determined via classification.
|
||||
*
|
||||
* If osd (orientation and script detection) is true then that is performed
|
||||
* as well. If only_osd is true, then only orientation and script detection is
|
||||
* performed. If osd is desired, (osd or only_osd) then osr_tess must be
|
||||
* another Tesseract that was initialized especially for osd, and the results
|
||||
* will be output into osr (orientation and script result).
|
||||
*/
|
||||
int Tesseract::AutoPageSeg(PageSegMode pageseg_mode, BLOCK_LIST* blocks,
|
||||
TO_BLOCK_LIST* to_blocks,
|
||||
BLOBNBOX_LIST* diacritic_blobs, Tesseract* osd_tess,
|
||||
OSResults* osr) {
|
||||
Pix* photomask_pix = nullptr;
|
||||
Pix* musicmask_pix = nullptr;
|
||||
// The blocks made by the ColumnFinder. Moved to blocks before return.
|
||||
BLOCK_LIST found_blocks;
|
||||
TO_BLOCK_LIST temp_blocks;
|
||||
|
||||
ColumnFinder* finder = SetupPageSegAndDetectOrientation(
|
||||
pageseg_mode, blocks, osd_tess, osr, &temp_blocks, &photomask_pix,
|
||||
pageseg_apply_music_mask ? &musicmask_pix : nullptr);
|
||||
#if 1
|
||||
pixDestroy(&photomask_pix);
|
||||
pixDestroy(&musicmask_pix);
|
||||
return 0;
|
||||
#else
|
||||
int result = 0;
|
||||
if (finder != nullptr) {
|
||||
TO_BLOCK_IT to_block_it(&temp_blocks);
|
||||
TO_BLOCK* to_block = to_block_it.data();
|
||||
if (musicmask_pix != nullptr) {
|
||||
// TODO(rays) pass the musicmask_pix into FindBlocks and mark music
|
||||
// blocks separately. For now combine with photomask_pix.
|
||||
pixOr(photomask_pix, photomask_pix, musicmask_pix);
|
||||
}
|
||||
if (equ_detect_) {
|
||||
finder->SetEquationDetect(equ_detect_);
|
||||
}
|
||||
result = finder->FindBlocks(pageseg_mode, scaled_color_, scaled_factor_,
|
||||
to_block, photomask_pix, pix_thresholds_,
|
||||
pix_grey_, &pixa_debug_, &found_blocks,
|
||||
diacritic_blobs, to_blocks);
|
||||
if (result >= 0)
|
||||
finder->GetDeskewVectors(&deskew_, &reskew_);
|
||||
delete finder;
|
||||
}
|
||||
pixDestroy(&photomask_pix);
|
||||
pixDestroy(&musicmask_pix);
|
||||
if (result < 0) return result;
|
||||
|
||||
blocks->clear();
|
||||
BLOCK_IT block_it(blocks);
|
||||
// Move the found blocks to the input/output blocks.
|
||||
block_it.add_list_after(&found_blocks);
|
||||
return result;
|
||||
#endif
|
||||
}
|
||||
|
||||
// Helper adds all the scripts from sid_set converted to ids from osd_set to
|
||||
// allowed_ids.
|
||||
static void AddAllScriptsConverted(const UNICHARSET& sid_set,
|
||||
const UNICHARSET& osd_set,
|
||||
GenericVector<int>* allowed_ids) {
|
||||
for (int i = 0; i < sid_set.get_script_table_size(); ++i) {
|
||||
if (i != sid_set.null_sid()) {
|
||||
const char* script = sid_set.get_script_from_script_id(i);
|
||||
allowed_ids->push_back(osd_set.get_script_id_from_name(script));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Sets up auto page segmentation, determines the orientation, and corrects it.
|
||||
* Somewhat arbitrary chunk of functionality, factored out of AutoPageSeg to
|
||||
* facilitate testing.
|
||||
* photo_mask_pix is a pointer to a nullptr pointer that will be filled on return
|
||||
* with the leptonica photo mask, which must be pixDestroyed by the caller.
|
||||
* to_blocks is an empty list that will be filled with (usually a single)
|
||||
* block that is used during layout analysis. This ugly API is required
|
||||
* because of the possibility of a unlv zone file.
|
||||
* TODO(rays) clean this up.
|
||||
* See AutoPageSeg for other arguments.
|
||||
* The returned ColumnFinder must be deleted after use.
|
||||
*/
|
||||
ColumnFinder* Tesseract::SetupPageSegAndDetectOrientation(
|
||||
PageSegMode pageseg_mode, BLOCK_LIST* blocks, Tesseract* osd_tess,
|
||||
OSResults* osr, TO_BLOCK_LIST* to_blocks, Pix** photo_mask_pix,
|
||||
Pix** music_mask_pix) {
|
||||
int vertical_x = 0;
|
||||
int vertical_y = 1;
|
||||
TabVector_LIST v_lines;
|
||||
TabVector_LIST h_lines;
|
||||
ICOORD bleft(0, 0);
|
||||
|
||||
ASSERT_HOST(pix_binary_ != nullptr);
|
||||
if (tessedit_dump_pageseg_images) {
|
||||
pixa_debug_.AddPix(pix_binary_, "PageSegInput");
|
||||
}
|
||||
// Leptonica is used to find the rule/separator lines in the input.
|
||||
LineFinder::FindAndRemoveLines(source_resolution_,
|
||||
textord_tabfind_show_vlines, pix_binary_,
|
||||
&vertical_x, &vertical_y, music_mask_pix,
|
||||
&v_lines, &h_lines);
|
||||
if (tessedit_dump_pageseg_images) {
|
||||
pixa_debug_.AddPix(pix_binary_, "NoLines");
|
||||
}
|
||||
// Leptonica is used to find a mask of the photo regions in the input.
|
||||
*photo_mask_pix = ImageFind::FindImages(pix_binary_, &pixa_debug_);
|
||||
if (tessedit_dump_pageseg_images) {
|
||||
pixa_debug_.AddPix(pix_binary_, "NoImages");
|
||||
}
|
||||
if (!PSM_COL_FIND_ENABLED(pageseg_mode)) v_lines.clear();
|
||||
|
||||
// The rest of the algorithm uses the usual connected components.
|
||||
textord_.find_components(pix_binary_, blocks, to_blocks);
|
||||
|
||||
TO_BLOCK_IT to_block_it(to_blocks);
|
||||
// There must be exactly one input block.
|
||||
// TODO(rays) handle new textline finding with a UNLV zone file.
|
||||
ASSERT_HOST(to_blocks->singleton());
|
||||
TO_BLOCK* to_block = to_block_it.data();
|
||||
TBOX blkbox = to_block->block->pdblk.bounding_box();
|
||||
ColumnFinder* finder = nullptr;
|
||||
int estimated_resolution = source_resolution_;
|
||||
if (source_resolution_ == kMinCredibleResolution) {
|
||||
// Try to estimate resolution from typical body text size.
|
||||
int res = IntCastRounded(to_block->line_size * kResolutionEstimationFactor);
|
||||
if (res > estimated_resolution && res < kMaxCredibleResolution) {
|
||||
estimated_resolution = res;
|
||||
tprintf("Estimating resolution as %d\n", estimated_resolution);
|
||||
}
|
||||
}
|
||||
|
||||
if (to_block->line_size >= 2) {
|
||||
finder = new ColumnFinder(static_cast<int>(to_block->line_size),
|
||||
blkbox.botleft(), blkbox.topright(),
|
||||
estimated_resolution, textord_use_cjk_fp_model,
|
||||
textord_tabfind_aligned_gap_fraction, &v_lines,
|
||||
&h_lines, vertical_x, vertical_y);
|
||||
|
||||
finder->SetupAndFilterNoise(pageseg_mode, *photo_mask_pix, to_block);
|
||||
|
||||
#ifndef DISABLED_LEGACY_ENGINE
|
||||
|
||||
if (equ_detect_) {
|
||||
equ_detect_->LabelSpecialText(to_block);
|
||||
}
|
||||
|
||||
BLOBNBOX_CLIST osd_blobs;
|
||||
// osd_orientation is the number of 90 degree rotations to make the
|
||||
// characters upright. (See osdetect.h for precise definition.)
|
||||
// We want the text lines horizontal, (vertical text indicates vertical
|
||||
// textlines) which may conflict (eg vertically written CJK).
|
||||
int osd_orientation = 0;
|
||||
bool vertical_text = textord_tabfind_force_vertical_text ||
|
||||
pageseg_mode == PSM_SINGLE_BLOCK_VERT_TEXT;
|
||||
if (!vertical_text && textord_tabfind_vertical_text &&
|
||||
PSM_ORIENTATION_ENABLED(pageseg_mode)) {
|
||||
vertical_text =
|
||||
finder->IsVerticallyAlignedText(textord_tabfind_vertical_text_ratio,
|
||||
to_block, &osd_blobs);
|
||||
}
|
||||
if (PSM_OSD_ENABLED(pageseg_mode) && osd_tess != nullptr && osr != nullptr) {
|
||||
GenericVector<int> osd_scripts;
|
||||
if (osd_tess != this) {
|
||||
// We are running osd as part of layout analysis, so constrain the
|
||||
// scripts to those allowed by *this.
|
||||
AddAllScriptsConverted(unicharset, osd_tess->unicharset, &osd_scripts);
|
||||
for (int s = 0; s < sub_langs_.size(); ++s) {
|
||||
AddAllScriptsConverted(sub_langs_[s]->unicharset,
|
||||
osd_tess->unicharset, &osd_scripts);
|
||||
}
|
||||
}
|
||||
os_detect_blobs(&osd_scripts, &osd_blobs, osr, osd_tess);
|
||||
if (pageseg_mode == PSM_OSD_ONLY) {
|
||||
delete finder;
|
||||
return nullptr;
|
||||
}
|
||||
osd_orientation = osr->best_result.orientation_id;
|
||||
double osd_score = osr->orientations[osd_orientation];
|
||||
double osd_margin = min_orientation_margin * 2;
|
||||
for (int i = 0; i < 4; ++i) {
|
||||
if (i != osd_orientation &&
|
||||
osd_score - osr->orientations[i] < osd_margin) {
|
||||
osd_margin = osd_score - osr->orientations[i];
|
||||
}
|
||||
}
|
||||
int best_script_id = osr->best_result.script_id;
|
||||
const char* best_script_str =
|
||||
osd_tess->unicharset.get_script_from_script_id(best_script_id);
|
||||
bool cjk = best_script_id == osd_tess->unicharset.han_sid() ||
|
||||
best_script_id == osd_tess->unicharset.hiragana_sid() ||
|
||||
best_script_id == osd_tess->unicharset.katakana_sid() ||
|
||||
strcmp("Japanese", best_script_str) == 0 ||
|
||||
strcmp("Korean", best_script_str) == 0 ||
|
||||
strcmp("Hangul", best_script_str) == 0;
|
||||
if (cjk) {
|
||||
finder->set_cjk_script(true);
|
||||
}
|
||||
if (osd_margin < min_orientation_margin) {
|
||||
// The margin is weak.
|
||||
if (!cjk && !vertical_text && osd_orientation == 2) {
|
||||
// upside down latin text is improbable with such a weak margin.
|
||||
tprintf("OSD: Weak margin (%.2f), horiz textlines, not CJK: "
|
||||
"Don't rotate.\n", osd_margin);
|
||||
osd_orientation = 0;
|
||||
} else {
|
||||
tprintf(
|
||||
"OSD: Weak margin (%.2f) for %d blob text block, "
|
||||
"but using orientation anyway: %d\n",
|
||||
osd_margin, osd_blobs.length(), osd_orientation);
|
||||
}
|
||||
}
|
||||
}
|
||||
osd_blobs.shallow_clear();
|
||||
finder->CorrectOrientation(to_block, vertical_text, osd_orientation);
|
||||
|
||||
#endif // ndef DISABLED_LEGACY_ENGINE
|
||||
}
|
||||
|
||||
return finder;
|
||||
}
|
||||
|
||||
} // namespace tesseract.
|
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Loading…
Reference in New Issue