initialize
This commit is contained in:
commit
d1907a5191
|
@ -0,0 +1,33 @@
|
|||
cd ../
|
||||
mkdir build
|
||||
cd build
|
||||
mkdir imgpc
|
||||
cd imgpc
|
||||
rm -rf *
|
||||
cmake ../../device/hgdriver/ImageProcess
|
||||
make
|
||||
|
||||
cd ..
|
||||
mkdir hgdev
|
||||
cd hgdev
|
||||
rm -rf *
|
||||
cmake ../../device/hgdriver/hgdev
|
||||
make
|
||||
|
||||
cd ..
|
||||
mkdir wrapper
|
||||
cd wrapper
|
||||
rm -rf *
|
||||
cmake ../../device/hgdriver/huagaoxxx_warraper_ex
|
||||
make
|
||||
|
||||
cd ..
|
||||
mkdir hgsane
|
||||
cd hgsane
|
||||
rm -rf *
|
||||
cmake ../../device/hgsane
|
||||
make
|
||||
sudo cp ../../release/Linux/x86_64/libsane-hgsane.so /usr/lib/x86_64-linux-gnu/sane/libsane-hgsane.so.1
|
||||
|
||||
|
||||
|
|
@ -0,0 +1,462 @@
|
|||
//______________________________________________________________________________
|
||||
//
|
||||
// Copyright (c) Cypress Semiconductor, 2003
|
||||
// All rights reserved.
|
||||
//
|
||||
//______________________________________________________________________________
|
||||
|
||||
#ifndef CyUSBH
|
||||
#define CyUSBH
|
||||
|
||||
#ifndef __USB200_H__
|
||||
#define __USB200_H__
|
||||
#include <Windows.h>
|
||||
|
||||
#pragma pack(push,1) ///
|
||||
|
||||
typedef struct _USB_DEVICE_DESCRIPTOR { //设备描述符
|
||||
UCHAR bLength; //长度
|
||||
UCHAR bDescriptorType; //描述符类型
|
||||
USHORT bcdUSB; //USB
|
||||
UCHAR bDeviceClass; //设备类
|
||||
UCHAR bDeviceSubClass; //设备派生类
|
||||
UCHAR bDeviceProtocol; //设备协议
|
||||
UCHAR bMaxPacketSize0; //最大数据包尺寸
|
||||
USHORT idVendor; //厂商ID
|
||||
USHORT idProduct; //产品ID
|
||||
USHORT bcdDevice; //设备
|
||||
UCHAR iManufacturer; //制造商
|
||||
UCHAR iProduct; //产品
|
||||
UCHAR iSerialNumber; //序列号
|
||||
UCHAR bNumConfigurations;//配置
|
||||
} USB_DEVICE_DESCRIPTOR, *PUSB_DEVICE_DESCRIPTOR;
|
||||
|
||||
typedef struct _USB_ENDPOINT_DESCRIPTOR { //端点描述符
|
||||
UCHAR bLength; //长度
|
||||
UCHAR bDescriptorType; //描述符类型
|
||||
UCHAR bEndpointAddress; //端点地址
|
||||
UCHAR bmAttributes; //端点属性
|
||||
USHORT wMaxPacketSize; //最大数据包尺寸
|
||||
UCHAR bInterval; //间隔
|
||||
} USB_ENDPOINT_DESCRIPTOR, *PUSB_ENDPOINT_DESCRIPTOR;
|
||||
|
||||
typedef struct _USB_CONFIGURATION_DESCRIPTOR { //配置描述符
|
||||
UCHAR bLength;
|
||||
UCHAR bDescriptorType;
|
||||
USHORT wTotalLength;
|
||||
UCHAR bNumInterfaces;
|
||||
UCHAR bConfigurationValue;
|
||||
UCHAR iConfiguration;
|
||||
UCHAR bmAttributes;
|
||||
UCHAR MaxPower;
|
||||
} USB_CONFIGURATION_DESCRIPTOR, *PUSB_CONFIGURATION_DESCRIPTOR;
|
||||
|
||||
typedef struct _USB_INTERFACE_DESCRIPTOR { //接口描述符
|
||||
UCHAR bLength;
|
||||
UCHAR bDescriptorType;
|
||||
UCHAR bInterfaceNumber;
|
||||
UCHAR bAlternateSetting;
|
||||
UCHAR bNumEndpoints;
|
||||
UCHAR bInterfaceClass;
|
||||
UCHAR bInterfaceSubClass;
|
||||
UCHAR bInterfaceProtocol;
|
||||
UCHAR iInterface;
|
||||
} USB_INTERFACE_DESCRIPTOR, *PUSB_INTERFACE_DESCRIPTOR;
|
||||
|
||||
typedef struct _USB_STRING_DESCRIPTOR { //字符串描述符
|
||||
UCHAR bLength;
|
||||
UCHAR bDescriptorType;
|
||||
WCHAR bString[1];
|
||||
} USB_STRING_DESCRIPTOR, *PUSB_STRING_DESCRIPTOR;
|
||||
|
||||
typedef struct _USB_COMMON_DESCRIPTOR { //USB串口描述符
|
||||
UCHAR bLength;
|
||||
UCHAR bDescriptorType;
|
||||
} USB_COMMON_DESCRIPTOR, *PUSB_COMMON_DESCRIPTOR;
|
||||
#pragma pack(pop)
|
||||
#endif
|
||||
//______________________________________________________________________________
|
||||
|
||||
class CCyIsoPktInfo { //包信息
|
||||
public:
|
||||
LONG Status; //包状态
|
||||
LONG Length; //包长度
|
||||
};
|
||||
|
||||
//______________________________________________________________________________
|
||||
|
||||
|
||||
// {AE18AA60-7F6A-11d4-97DD-00010229B959}
|
||||
static GUID CYUSBDRV_GUID = {0xae18aa60, 0x7f6a, 0x11d4, 0x97, 0xdd, 0x0, 0x1, 0x2, 0x29, 0xb9, 0x59};
|
||||
|
||||
typedef enum {TGT_DEVICE, TGT_INTFC, TGT_ENDPT, TGT_OTHER } CTL_XFER_TGT_TYPE;
|
||||
typedef enum {REQ_STD, REQ_CLASS, REQ_VENDOR } CTL_XFER_REQ_TYPE;
|
||||
typedef enum {DIR_TO_DEVICE, DIR_FROM_DEVICE } CTL_XFER_DIR_TYPE;
|
||||
typedef enum {XMODE_BUFFERED, XMODE_DIRECT } XFER_MODE_TYPE;
|
||||
|
||||
const int MAX_ENDPTS = 16;
|
||||
const int MAX_INTERFACES = 8;
|
||||
const int USB_STRING_MAXLEN = 256;
|
||||
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// The CCyEndPoint ABSTRACT Class
|
||||
//
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
class CCyUSBEndPoint
|
||||
{
|
||||
protected:
|
||||
bool WaitForIO(OVERLAPPED *ovLapStatus);
|
||||
|
||||
virtual PUCHAR BeginDirectXfer(PUCHAR buf, LONG bufLen, OVERLAPPED *ov); //直接传输模式
|
||||
virtual PUCHAR BeginBufferedXfer(PUCHAR buf, LONG bufLen, OVERLAPPED *ov); //缓冲传输模式
|
||||
|
||||
|
||||
public:
|
||||
|
||||
CCyUSBEndPoint(void);
|
||||
CCyUSBEndPoint(CCyUSBEndPoint& ept);
|
||||
CCyUSBEndPoint(HANDLE h, PUSB_ENDPOINT_DESCRIPTOR pEndPtDescriptor);
|
||||
|
||||
HANDLE hDevice;
|
||||
|
||||
// The fields of an EndPoint Descriptor
|
||||
UCHAR DscLen;
|
||||
UCHAR DscType;
|
||||
UCHAR Address;
|
||||
UCHAR Attributes;
|
||||
USHORT MaxPktSize;
|
||||
USHORT PktsPerFrame;
|
||||
UCHAR Interval;
|
||||
|
||||
// Other fields
|
||||
ULONG TimeOut;
|
||||
ULONG UsbdStatus;
|
||||
ULONG NtStatus;
|
||||
|
||||
DWORD bytesWritten;
|
||||
DWORD LastError;
|
||||
bool bIn;
|
||||
|
||||
XFER_MODE_TYPE XferMode;
|
||||
|
||||
bool XferData(PUCHAR buf, LONG &len, CCyIsoPktInfo* pktInfos = NULL);
|
||||
bool XferData(PUCHAR buf, LONG &bufLen, CCyIsoPktInfo* pktInfos, bool pktMode);
|
||||
virtual PUCHAR BeginDataXfer(PUCHAR buf, LONG len, OVERLAPPED *ov) = 0;
|
||||
virtual bool FinishDataXfer(PUCHAR buf, LONG &len, OVERLAPPED *ov, PUCHAR pXmitBuf, CCyIsoPktInfo* pktInfos = NULL);
|
||||
bool WaitForXfer(OVERLAPPED *ov, ULONG tOut);
|
||||
ULONG GetXferSize(void);
|
||||
void SetXferSize(ULONG xfer);
|
||||
|
||||
bool Reset(void);
|
||||
bool Abort(void);
|
||||
|
||||
private:
|
||||
|
||||
|
||||
|
||||
};
|
||||
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// The Control Endpoint Class
|
||||
//
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
class CCyControlEndPoint : public CCyUSBEndPoint
|
||||
{
|
||||
private:
|
||||
|
||||
public:
|
||||
CCyControlEndPoint(void);
|
||||
CCyControlEndPoint(CCyControlEndPoint& ept);
|
||||
CCyControlEndPoint(HANDLE h, PUSB_ENDPOINT_DESCRIPTOR pEndPtDescriptor);
|
||||
|
||||
CTL_XFER_TGT_TYPE Target;
|
||||
CTL_XFER_REQ_TYPE ReqType;
|
||||
CTL_XFER_DIR_TYPE Direction;
|
||||
|
||||
UCHAR ReqCode;
|
||||
WORD Value;
|
||||
WORD Index;
|
||||
|
||||
bool Read(PUCHAR buf, LONG &len);
|
||||
bool Write(PUCHAR buf, LONG &len);
|
||||
PUCHAR BeginDataXfer(PUCHAR buf, LONG len, OVERLAPPED *ov);
|
||||
};
|
||||
|
||||
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// The Isoc Endpoint Class
|
||||
//
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
class CCyIsocEndPoint : public CCyUSBEndPoint
|
||||
{
|
||||
|
||||
protected:
|
||||
virtual PUCHAR BeginDirectXfer(PUCHAR buf, LONG bufLen, OVERLAPPED *ov);
|
||||
virtual PUCHAR BeginBufferedXfer(PUCHAR buf, LONG bufLen, OVERLAPPED *ov);
|
||||
|
||||
public:
|
||||
CCyIsocEndPoint(void);
|
||||
CCyIsocEndPoint(HANDLE h, PUSB_ENDPOINT_DESCRIPTOR pEndPtDescriptor);
|
||||
|
||||
PUCHAR BeginDataXfer(PUCHAR buf, LONG len, OVERLAPPED *ov);
|
||||
CCyIsoPktInfo* CreatePktInfos(LONG bufLen, int &packets);
|
||||
|
||||
};
|
||||
|
||||
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// The Bulk Endpoint Class
|
||||
//
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
class CCyBulkEndPoint : public CCyUSBEndPoint
|
||||
{
|
||||
public:
|
||||
CCyBulkEndPoint(void);
|
||||
CCyBulkEndPoint(HANDLE h, PUSB_ENDPOINT_DESCRIPTOR pEndPtDescriptor);
|
||||
|
||||
PUCHAR BeginDataXfer(PUCHAR buf, LONG len, OVERLAPPED *ov);
|
||||
};
|
||||
|
||||
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// The Interrupt Endpoint Class
|
||||
//
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
class CCyInterruptEndPoint : public CCyUSBEndPoint
|
||||
{
|
||||
public:
|
||||
CCyInterruptEndPoint(void);
|
||||
CCyInterruptEndPoint(HANDLE h, PUSB_ENDPOINT_DESCRIPTOR pEndPtDescriptor);
|
||||
|
||||
PUCHAR BeginDataXfer(PUCHAR buf, LONG len, OVERLAPPED *ov);
|
||||
};
|
||||
|
||||
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// The Interface Class
|
||||
//
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
class CCyUSBInterface
|
||||
{
|
||||
private:
|
||||
protected:
|
||||
public:
|
||||
CCyUSBEndPoint *EndPoints[MAX_ENDPTS]; // Holds pointers to all the interface's endpoints, plus a pointer to the Control endpoint zero
|
||||
|
||||
UCHAR bLength;
|
||||
UCHAR bDescriptorType;
|
||||
UCHAR bInterfaceNumber;
|
||||
UCHAR bAlternateSetting;
|
||||
UCHAR bNumEndpoints; // Not counting the control endpoint
|
||||
UCHAR bInterfaceClass;
|
||||
UCHAR bInterfaceSubClass;
|
||||
UCHAR bInterfaceProtocol;
|
||||
UCHAR iInterface;
|
||||
|
||||
UCHAR bAltSettings;
|
||||
USHORT wTotalLength; // Needed in case Intfc has additional (non-endpt) descriptors
|
||||
|
||||
|
||||
CCyUSBInterface(HANDLE h, PUSB_INTERFACE_DESCRIPTOR pIntfcDescriptor);
|
||||
CCyUSBInterface(CCyUSBInterface& ifc); // Copy Constructor
|
||||
~CCyUSBInterface(void);
|
||||
|
||||
};
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// The Config Class
|
||||
//
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
class CCyUSBConfig
|
||||
{
|
||||
private:
|
||||
|
||||
protected:
|
||||
public:
|
||||
CCyUSBInterface *Interfaces[MAX_INTERFACES];
|
||||
|
||||
UCHAR bLength;
|
||||
UCHAR bDescriptorType;
|
||||
USHORT wTotalLength;
|
||||
UCHAR bNumInterfaces;
|
||||
UCHAR bConfigurationValue;
|
||||
UCHAR iConfiguration;
|
||||
UCHAR bmAttributes;
|
||||
UCHAR MaxPower;
|
||||
|
||||
UCHAR AltInterfaces;
|
||||
|
||||
|
||||
CCyUSBConfig(void);
|
||||
CCyUSBConfig(CCyUSBConfig& cfg); // Copy Constructor
|
||||
CCyUSBConfig(HANDLE h, PUSB_CONFIGURATION_DESCRIPTOR pConfigDescr);
|
||||
~CCyUSBConfig(void);
|
||||
|
||||
|
||||
|
||||
};
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// The USB Device Class - This is the main class that contains members of all the
|
||||
// other classes.
|
||||
//
|
||||
// To use the library, create an instance of this Class and call it's Open method
|
||||
//
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
class CCyUSBDevice
|
||||
{
|
||||
// The public members are accessible (i.e. corruptible) by the user of the library
|
||||
// Algorithms of the class don't rely on any public members. Instead, they use the
|
||||
// private members of the class for their calculations.
|
||||
|
||||
public:
|
||||
|
||||
CCyUSBDevice(HANDLE hnd = NULL, GUID guid = CYUSBDRV_GUID, BOOL bOpen = true);
|
||||
~CCyUSBDevice(void);
|
||||
|
||||
CCyUSBEndPoint **EndPoints; // Shortcut to USBCfgs[CfgNum]->Interfaces[IntfcIndex]->Endpoints
|
||||
CCyUSBEndPoint *EndPointOf(UCHAR addr);
|
||||
|
||||
CCyControlEndPoint *ControlEndPt;
|
||||
CCyIsocEndPoint *IsocInEndPt;
|
||||
CCyIsocEndPoint *IsocOutEndPt;
|
||||
CCyBulkEndPoint *BulkInEndPt;
|
||||
CCyBulkEndPoint *BulkOutEndPt;
|
||||
CCyInterruptEndPoint *InterruptInEndPt;
|
||||
CCyInterruptEndPoint *InterruptOutEndPt;
|
||||
|
||||
USHORT StrLangID;
|
||||
ULONG UsbdStatus;
|
||||
ULONG NtStatus;
|
||||
ULONG DriverVersion;
|
||||
ULONG USBDIVersion;
|
||||
char DeviceName[USB_STRING_MAXLEN];
|
||||
char FriendlyName[USB_STRING_MAXLEN];
|
||||
wchar_t Manufacturer[USB_STRING_MAXLEN];
|
||||
wchar_t Product[USB_STRING_MAXLEN];
|
||||
wchar_t SerialNumber[USB_STRING_MAXLEN];
|
||||
|
||||
CHAR DevPath[USB_STRING_MAXLEN];
|
||||
|
||||
USHORT BcdUSB;
|
||||
USHORT VendorID;
|
||||
USHORT ProductID;
|
||||
UCHAR USBAddress;
|
||||
UCHAR DevClass;
|
||||
UCHAR DevSubClass;
|
||||
UCHAR DevProtocol;
|
||||
UCHAR MaxPacketSize;
|
||||
USHORT BcdDevice;
|
||||
|
||||
UCHAR ConfigValue;
|
||||
UCHAR ConfigAttrib;
|
||||
UCHAR MaxPower;
|
||||
|
||||
UCHAR IntfcClass;
|
||||
UCHAR IntfcSubClass;
|
||||
UCHAR IntfcProtocol;
|
||||
bool bHighSpeed;
|
||||
|
||||
DWORD BytesXfered;
|
||||
|
||||
|
||||
UCHAR DeviceCount(void);
|
||||
UCHAR ConfigCount(void);
|
||||
UCHAR IntfcCount(void);
|
||||
UCHAR AltIntfcCount(void);
|
||||
UCHAR EndPointCount(void);
|
||||
|
||||
UCHAR Config(void) { return CfgNum; } // Normally 0
|
||||
void SetConfig(UCHAR cfg);
|
||||
|
||||
UCHAR Interface(void) { return IntfcNum; } // Usually 0
|
||||
// No SetInterface method since only 1 intfc per device (per Windows)
|
||||
|
||||
UCHAR AltIntfc(void);
|
||||
bool SetAltIntfc(UCHAR alt);
|
||||
|
||||
GUID DriverGUID(void) { return DrvGuid; }
|
||||
HANDLE DeviceHandle(void) { return hDevice; }
|
||||
void UsbdStatusString(ULONG stat, PCHAR s);
|
||||
bool CreateHandle(UCHAR dev);
|
||||
void DestroyHandle();
|
||||
|
||||
bool Open(UCHAR dev);
|
||||
void Close(void);
|
||||
bool Reset(void);
|
||||
bool ReConnect(void);
|
||||
bool Suspend(void);
|
||||
bool Resume(void);
|
||||
bool IsOpen(void) { return (hDevice != INVALID_HANDLE_VALUE); }
|
||||
|
||||
UCHAR PowerState(void);
|
||||
|
||||
|
||||
void GetDeviceDescriptor(PUSB_DEVICE_DESCRIPTOR descr);
|
||||
void GetConfigDescriptor(PUSB_CONFIGURATION_DESCRIPTOR descr);
|
||||
void GetIntfcDescriptor(PUSB_INTERFACE_DESCRIPTOR descr);
|
||||
CCyUSBConfig GetUSBConfig(int index);
|
||||
|
||||
|
||||
private:
|
||||
|
||||
USB_DEVICE_DESCRIPTOR USBDeviceDescriptor;
|
||||
PUSB_CONFIGURATION_DESCRIPTOR USBConfigDescriptors[2];
|
||||
|
||||
CCyUSBConfig *USBCfgs[2];
|
||||
|
||||
HANDLE hWnd;
|
||||
HANDLE hDevice;
|
||||
HANDLE hDevNotification;
|
||||
HANDLE hHndNotification;
|
||||
|
||||
GUID DrvGuid;
|
||||
|
||||
UCHAR Devices;
|
||||
UCHAR Interfaces;
|
||||
UCHAR AltInterfaces;
|
||||
UCHAR Configs;
|
||||
|
||||
UCHAR DevNum;
|
||||
UCHAR CfgNum;
|
||||
UCHAR IntfcNum; // The current selected interface's bInterfaceNumber
|
||||
UCHAR IntfcIndex; // The entry in the Config's interfaces table matching to IntfcNum and AltSetting
|
||||
|
||||
void GetDevDescriptor(void);
|
||||
void GetCfgDescriptor(int descIndex);
|
||||
void GetString(wchar_t *s, UCHAR sIndex);
|
||||
void SetStringDescrLanguage(void);
|
||||
void SetAltIntfcParams(UCHAR alt);
|
||||
bool IoControl(ULONG cmd, PUCHAR buf, ULONG len);
|
||||
|
||||
void SetEndPointPtrs(void);
|
||||
void GetDeviceName(void);
|
||||
void GetFriendlyName(void);
|
||||
void GetDriverVer(void);
|
||||
void GetUSBDIVer(void);
|
||||
void GetSpeed(void);
|
||||
void GetUSBAddress(void);
|
||||
//void CloseEndPtHandles(void);
|
||||
|
||||
bool RegisterForPnpEvents(HANDLE h);
|
||||
};
|
||||
|
||||
//---------------------------------------------------------------------------
|
||||
#endif
|
|
@ -0,0 +1,34 @@
|
|||
#pragma once
|
||||
#include <chrono>
|
||||
|
||||
class StopWatch
|
||||
{
|
||||
public:
|
||||
StopWatch() {
|
||||
_start = std::chrono::steady_clock::now();
|
||||
}
|
||||
|
||||
void reset() {
|
||||
_start = std::chrono::steady_clock::now();
|
||||
}
|
||||
|
||||
double elapsed_s() {
|
||||
return std::chrono::duration<double>(std::chrono::steady_clock::now() - _start).count();
|
||||
}
|
||||
|
||||
double elapsed_ms() {
|
||||
return std::chrono::duration<double, std::milli>(std::chrono::steady_clock::now() - _start).count();
|
||||
}
|
||||
|
||||
double elapsed_us() {
|
||||
return std::chrono::duration<double, std::micro>(std::chrono::steady_clock::now() - _start).count();
|
||||
}
|
||||
|
||||
double elapsed_ns() {
|
||||
return std::chrono::duration<double, std::nano>(std::chrono::steady_clock::now() - _start).count();
|
||||
}
|
||||
|
||||
private:
|
||||
std::chrono::steady_clock::time_point _start;
|
||||
};
|
||||
|
|
@ -0,0 +1,33 @@
|
|||
///////////////////////////////////////////////////////////////////////
|
||||
// File: apitypes.h
|
||||
// Description: Types used in both the API and internally
|
||||
// Author: Ray Smith
|
||||
// Created: Wed Mar 03 09:22:53 PST 2010
|
||||
//
|
||||
// (C) Copyright 2010, Google Inc.
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
///////////////////////////////////////////////////////////////////////
|
||||
|
||||
#ifndef TESSERACT_API_APITYPES_H__
|
||||
#define TESSERACT_API_APITYPES_H__
|
||||
|
||||
#include "publictypes.h"
|
||||
|
||||
// The types used by the API and Page/ResultIterator can be found in:
|
||||
// ccstruct/publictypes.h
|
||||
// ccmain/resultiterator.h
|
||||
// ccmain/pageiterator.h
|
||||
// API interfaces and API users should be sure to include this file, rather
|
||||
// than the lower-level one, and lower-level code should be sure to include
|
||||
// only the lower-level file.
|
||||
|
||||
#endif // TESSERACT_API_APITYPES_H__
|
File diff suppressed because it is too large
Load Diff
|
@ -0,0 +1,922 @@
|
|||
///////////////////////////////////////////////////////////////////////
|
||||
// File: baseapi.h
|
||||
// Description: Simple API for calling tesseract.
|
||||
// Author: Ray Smith
|
||||
// Created: Fri Oct 06 15:35:01 PDT 2006
|
||||
//
|
||||
// (C) Copyright 2006, Google Inc.
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
///////////////////////////////////////////////////////////////////////
|
||||
|
||||
#ifndef TESSERACT_API_BASEAPI_H__
|
||||
#define TESSERACT_API_BASEAPI_H__
|
||||
|
||||
#define TESSERACT_VERSION_STR "3.05.02"
|
||||
#define TESSERACT_VERSION 0x030502
|
||||
#define MAKE_VERSION(major, minor, patch) (((major) << 16) | ((minor) << 8) | \
|
||||
(patch))
|
||||
|
||||
#include <stdio.h>
|
||||
// To avoid collision with other typenames include the ABSOLUTE MINIMUM
|
||||
// complexity of includes here. Use forward declarations wherever possible
|
||||
// and hide includes of complex types in baseapi.cpp.
|
||||
#include "platform.h"
|
||||
#include "apitypes.h"
|
||||
#include "thresholder.h"
|
||||
#include "unichar.h"
|
||||
#include "tesscallback.h"
|
||||
#include "publictypes.h"
|
||||
#include "pageiterator.h"
|
||||
#include "resultiterator.h"
|
||||
|
||||
template <typename T> class GenericVector;
|
||||
class PAGE_RES;
|
||||
class PAGE_RES_IT;
|
||||
class ParagraphModel;
|
||||
struct BlamerBundle;
|
||||
class BLOCK_LIST;
|
||||
class DENORM;
|
||||
class MATRIX;
|
||||
class ROW;
|
||||
class STRING;
|
||||
class WERD;
|
||||
struct Pix;
|
||||
struct Box;
|
||||
struct Pixa;
|
||||
struct Boxa;
|
||||
class ETEXT_DESC;
|
||||
struct OSResults;
|
||||
class TBOX;
|
||||
class UNICHARSET;
|
||||
class WERD_CHOICE_LIST;
|
||||
|
||||
struct INT_FEATURE_STRUCT;
|
||||
typedef INT_FEATURE_STRUCT *INT_FEATURE;
|
||||
struct TBLOB;
|
||||
|
||||
namespace tesseract {
|
||||
|
||||
#ifndef NO_CUBE_BUILD
|
||||
class CubeRecoContext;
|
||||
#endif // NO_CUBE_BUILD
|
||||
class Dawg;
|
||||
class Dict;
|
||||
class EquationDetect;
|
||||
class PageIterator;
|
||||
class LTRResultIterator;
|
||||
class ResultIterator;
|
||||
class MutableIterator;
|
||||
class TessResultRenderer;
|
||||
class Tesseract;
|
||||
class Trie;
|
||||
class Wordrec;
|
||||
|
||||
typedef int (Dict::*DictFunc)(void* void_dawg_args,
|
||||
UNICHAR_ID unichar_id, bool word_end) const;
|
||||
typedef double (Dict::*ProbabilityInContextFunc)(const char* lang,
|
||||
const char* context,
|
||||
int context_bytes,
|
||||
const char* character,
|
||||
int character_bytes);
|
||||
typedef float (Dict::*ParamsModelClassifyFunc)(
|
||||
const char *lang, void *path);
|
||||
typedef void (Wordrec::*FillLatticeFunc)(const MATRIX &ratings,
|
||||
const WERD_CHOICE_LIST &best_choices,
|
||||
const UNICHARSET &unicharset,
|
||||
BlamerBundle *blamer_bundle);
|
||||
typedef TessCallback4<const UNICHARSET &, int, PageIterator *, Pix *>
|
||||
TruthCallback;
|
||||
|
||||
/**
|
||||
* Base class for all tesseract APIs.
|
||||
* Specific classes can add ability to work on different inputs or produce
|
||||
* different outputs.
|
||||
* This class is mostly an interface layer on top of the Tesseract instance
|
||||
* class to hide the data types so that users of this class don't have to
|
||||
* include any other Tesseract headers.
|
||||
*/
|
||||
class TESS_API TessBaseAPI {
|
||||
public:
|
||||
TessBaseAPI();
|
||||
virtual ~TessBaseAPI();
|
||||
|
||||
/**
|
||||
* Returns the version identifier as a static string. Do not delete.
|
||||
*/
|
||||
static const char* Version();
|
||||
|
||||
/**
|
||||
* If compiled with OpenCL AND an available OpenCL
|
||||
* device is deemed faster than serial code, then
|
||||
* "device" is populated with the cl_device_id
|
||||
* and returns sizeof(cl_device_id)
|
||||
* otherwise *device=NULL and returns 0.
|
||||
*/
|
||||
static size_t getOpenCLDevice(void **device);
|
||||
|
||||
/**
|
||||
* Writes the thresholded image to stderr as a PBM file on receipt of a
|
||||
* SIGSEGV, SIGFPE, or SIGBUS signal. (Linux/Unix only).
|
||||
*/
|
||||
static void CatchSignals();
|
||||
|
||||
/**
|
||||
* Set the name of the input file. Needed for training and
|
||||
* reading a UNLV zone file, and for searchable PDF output.
|
||||
*/
|
||||
void SetInputName(const char* name);
|
||||
/**
|
||||
* These functions are required for searchable PDF output.
|
||||
* We need our hands on the input file so that we can include
|
||||
* it in the PDF without transcoding. If that is not possible,
|
||||
* we need the original image. Finally, resolution metadata
|
||||
* is stored in the PDF so we need that as well.
|
||||
*/
|
||||
const char* GetInputName();
|
||||
// Takes ownership of the input pix.
|
||||
void SetInputImage(Pix *pix);
|
||||
Pix* GetInputImage();
|
||||
int GetSourceYResolution();
|
||||
const char* GetDatapath();
|
||||
|
||||
/** Set the name of the bonus output files. Needed only for debugging. */
|
||||
void SetOutputName(const char* name);
|
||||
|
||||
/**
|
||||
* Set the value of an internal "parameter."
|
||||
* Supply the name of the parameter and the value as a string, just as
|
||||
* you would in a config file.
|
||||
* Returns false if the name lookup failed.
|
||||
* Eg SetVariable("tessedit_char_blacklist", "xyz"); to ignore x, y and z.
|
||||
* Or SetVariable("classify_bln_numeric_mode", "1"); to set numeric-only mode.
|
||||
* SetVariable may be used before Init, but settings will revert to
|
||||
* defaults on End().
|
||||
*
|
||||
* Note: Must be called after Init(). Only works for non-init variables
|
||||
* (init variables should be passed to Init()).
|
||||
*/
|
||||
bool SetVariable(const char* name, const char* value);
|
||||
bool SetDebugVariable(const char* name, const char* value);
|
||||
|
||||
/**
|
||||
* Returns true if the parameter was found among Tesseract parameters.
|
||||
* Fills in value with the value of the parameter.
|
||||
*/
|
||||
bool GetIntVariable(const char *name, int *value) const;
|
||||
bool GetBoolVariable(const char *name, bool *value) const;
|
||||
bool GetDoubleVariable(const char *name, double *value) const;
|
||||
|
||||
/**
|
||||
* Returns the pointer to the string that represents the value of the
|
||||
* parameter if it was found among Tesseract parameters.
|
||||
*/
|
||||
const char *GetStringVariable(const char *name) const;
|
||||
|
||||
/**
|
||||
* Print Tesseract parameters to the given file.
|
||||
*/
|
||||
void PrintVariables(FILE *fp) const;
|
||||
|
||||
/**
|
||||
* Get value of named variable as a string, if it exists.
|
||||
*/
|
||||
bool GetVariableAsString(const char *name, STRING *val);
|
||||
|
||||
/**
|
||||
* Instances are now mostly thread-safe and totally independent,
|
||||
* but some global parameters remain. Basically it is safe to use multiple
|
||||
* TessBaseAPIs in different threads in parallel, UNLESS:
|
||||
* you use SetVariable on some of the Params in classify and textord.
|
||||
* If you do, then the effect will be to change it for all your instances.
|
||||
*
|
||||
* Start tesseract. Returns zero on success and -1 on failure.
|
||||
* NOTE that the only members that may be called before Init are those
|
||||
* listed above here in the class definition.
|
||||
*
|
||||
* The datapath must be the name of the parent directory of tessdata and
|
||||
* must end in / . Any name after the last / will be stripped.
|
||||
* The language is (usually) an ISO 639-3 string or NULL will default to eng.
|
||||
* It is entirely safe (and eventually will be efficient too) to call
|
||||
* Init multiple times on the same instance to change language, or just
|
||||
* to reset the classifier.
|
||||
* The language may be a string of the form [~]<lang>[+[~]<lang>]* indicating
|
||||
* that multiple languages are to be loaded. Eg hin+eng will load Hindi and
|
||||
* English. Languages may specify internally that they want to be loaded
|
||||
* with one or more other languages, so the ~ sign is available to override
|
||||
* that. Eg if hin were set to load eng by default, then hin+~eng would force
|
||||
* loading only hin. The number of loaded languages is limited only by
|
||||
* memory, with the caveat that loading additional languages will impact
|
||||
* both speed and accuracy, as there is more work to do to decide on the
|
||||
* applicable language, and there is more chance of hallucinating incorrect
|
||||
* words.
|
||||
* WARNING: On changing languages, all Tesseract parameters are reset
|
||||
* back to their default values. (Which may vary between languages.)
|
||||
* If you have a rare need to set a Variable that controls
|
||||
* initialization for a second call to Init you should explicitly
|
||||
* call End() and then use SetVariable before Init. This is only a very
|
||||
* rare use case, since there are very few uses that require any parameters
|
||||
* to be set before Init.
|
||||
*
|
||||
* If set_only_non_debug_params is true, only params that do not contain
|
||||
* "debug" in the name will be set.
|
||||
*/
|
||||
int Init(const char* datapath, const char* language, OcrEngineMode mode,
|
||||
char **configs, int configs_size,
|
||||
const GenericVector<STRING> *vars_vec,
|
||||
const GenericVector<STRING> *vars_values,
|
||||
bool set_only_non_debug_params);
|
||||
int Init(const char* datapath, const char* language, OcrEngineMode oem) {
|
||||
return Init(datapath, language, oem, NULL, 0, NULL, NULL, false);
|
||||
}
|
||||
int Init(const char* datapath, const char* language) {
|
||||
return Init(datapath, language, OEM_DEFAULT, NULL, 0, NULL, NULL, false);
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the languages string used in the last valid initialization.
|
||||
* If the last initialization specified "deu+hin" then that will be
|
||||
* returned. If hin loaded eng automatically as well, then that will
|
||||
* not be included in this list. To find the languages actually
|
||||
* loaded use GetLoadedLanguagesAsVector.
|
||||
* The returned string should NOT be deleted.
|
||||
*/
|
||||
const char* GetInitLanguagesAsString() const;
|
||||
|
||||
/**
|
||||
* Returns the loaded languages in the vector of STRINGs.
|
||||
* Includes all languages loaded by the last Init, including those loaded
|
||||
* as dependencies of other loaded languages.
|
||||
*/
|
||||
void GetLoadedLanguagesAsVector(GenericVector<STRING>* langs) const;
|
||||
|
||||
/**
|
||||
* Returns the available languages in the vector of STRINGs.
|
||||
*/
|
||||
void GetAvailableLanguagesAsVector(GenericVector<STRING>* langs) const;
|
||||
|
||||
/**
|
||||
* Init only the lang model component of Tesseract. The only functions
|
||||
* that work after this init are SetVariable and IsValidWord.
|
||||
* WARNING: temporary! This function will be removed from here and placed
|
||||
* in a separate API at some future time.
|
||||
*/
|
||||
int InitLangMod(const char* datapath, const char* language);
|
||||
|
||||
/**
|
||||
* Init only for page layout analysis. Use only for calls to SetImage and
|
||||
* AnalysePage. Calls that attempt recognition will generate an error.
|
||||
*/
|
||||
void InitForAnalysePage();
|
||||
|
||||
/**
|
||||
* Read a "config" file containing a set of param, value pairs.
|
||||
* Searches the standard places: tessdata/configs, tessdata/tessconfigs
|
||||
* and also accepts a relative or absolute path name.
|
||||
* Note: only non-init params will be set (init params are set by Init()).
|
||||
*/
|
||||
void ReadConfigFile(const char* filename);
|
||||
/** Same as above, but only set debug params from the given config file. */
|
||||
void ReadDebugConfigFile(const char* filename);
|
||||
|
||||
/**
|
||||
* Set the current page segmentation mode. Defaults to PSM_SINGLE_BLOCK.
|
||||
* The mode is stored as an IntParam so it can also be modified by
|
||||
* ReadConfigFile or SetVariable("tessedit_pageseg_mode", mode as string).
|
||||
*/
|
||||
void SetPageSegMode(PageSegMode mode);
|
||||
|
||||
/** Return the current page segmentation mode. */
|
||||
PageSegMode GetPageSegMode() const;
|
||||
|
||||
/**
|
||||
* Recognize a rectangle from an image and return the result as a string.
|
||||
* May be called many times for a single Init.
|
||||
* Currently has no error checking.
|
||||
* Greyscale of 8 and color of 24 or 32 bits per pixel may be given.
|
||||
* Palette color images will not work properly and must be converted to
|
||||
* 24 bit.
|
||||
* Binary images of 1 bit per pixel may also be given but they must be
|
||||
* byte packed with the MSB of the first byte being the first pixel, and a
|
||||
* 1 represents WHITE. For binary images set bytes_per_pixel=0.
|
||||
* The recognized text is returned as a char* which is coded
|
||||
* as UTF8 and must be freed with the delete [] operator.
|
||||
*
|
||||
* Note that TesseractRect is the simplified convenience interface.
|
||||
* For advanced uses, use SetImage, (optionally) SetRectangle, Recognize,
|
||||
* and one or more of the Get*Text functions below.
|
||||
*/
|
||||
char* TesseractRect(const unsigned char* imagedata,
|
||||
int bytes_per_pixel, int bytes_per_line,
|
||||
int left, int top, int width, int height);
|
||||
|
||||
/**
|
||||
* Call between pages or documents etc to free up memory and forget
|
||||
* adaptive data.
|
||||
*/
|
||||
void ClearAdaptiveClassifier();
|
||||
|
||||
/**
|
||||
* @defgroup AdvancedAPI Advanced API
|
||||
* The following methods break TesseractRect into pieces, so you can
|
||||
* get hold of the thresholded image, get the text in different formats,
|
||||
* get bounding boxes, confidences etc.
|
||||
*/
|
||||
/* @{ */
|
||||
|
||||
/**
|
||||
* Provide an image for Tesseract to recognize. Format is as
|
||||
* TesseractRect above. Copies the image buffer and converts to Pix.
|
||||
* SetImage clears all recognition results, and sets the rectangle to the
|
||||
* full image, so it may be followed immediately by a GetUTF8Text, and it
|
||||
* will automatically perform recognition.
|
||||
*/
|
||||
void SetImage(const unsigned char* imagedata, int width, int height,
|
||||
int bytes_per_pixel, int bytes_per_line);
|
||||
|
||||
/**
|
||||
* Provide an image for Tesseract to recognize. As with SetImage above,
|
||||
* Tesseract takes its own copy of the image, so it need not persist until
|
||||
* after Recognize.
|
||||
* Pix vs raw, which to use?
|
||||
* Use Pix where possible. Tesseract uses Pix as its internal representation
|
||||
* and it is therefore more efficient to provide a Pix directly.
|
||||
*/
|
||||
void SetImage(Pix* pix);
|
||||
|
||||
/**
|
||||
* Set the resolution of the source image in pixels per inch so font size
|
||||
* information can be calculated in results. Call this after SetImage().
|
||||
*/
|
||||
void SetSourceResolution(int ppi);
|
||||
|
||||
/**
|
||||
* Restrict recognition to a sub-rectangle of the image. Call after SetImage.
|
||||
* Each SetRectangle clears the recogntion results so multiple rectangles
|
||||
* can be recognized with the same image.
|
||||
*/
|
||||
void SetRectangle(int left, int top, int width, int height);
|
||||
|
||||
/**
|
||||
* In extreme cases only, usually with a subclass of Thresholder, it
|
||||
* is possible to provide a different Thresholder. The Thresholder may
|
||||
* be preloaded with an image, settings etc, or they may be set after.
|
||||
* Note that Tesseract takes ownership of the Thresholder and will
|
||||
* delete it when it it is replaced or the API is destructed.
|
||||
*/
|
||||
void SetThresholder(ImageThresholder* thresholder) {
|
||||
delete thresholder_;
|
||||
thresholder_ = thresholder;
|
||||
ClearResults();
|
||||
}
|
||||
|
||||
/**
|
||||
* Get a copy of the internal thresholded image from Tesseract.
|
||||
* Caller takes ownership of the Pix and must pixDestroy it.
|
||||
* May be called any time after SetImage, or after TesseractRect.
|
||||
*/
|
||||
Pix* GetThresholdedImage();
|
||||
|
||||
/**
|
||||
* Get the result of page layout analysis as a leptonica-style
|
||||
* Boxa, Pixa pair, in reading order.
|
||||
* Can be called before or after Recognize.
|
||||
*/
|
||||
Boxa* GetRegions(Pixa** pixa);
|
||||
|
||||
/**
|
||||
* Get the textlines as a leptonica-style
|
||||
* Boxa, Pixa pair, in reading order.
|
||||
* Can be called before or after Recognize.
|
||||
* If raw_image is true, then extract from the original image instead of the
|
||||
* thresholded image and pad by raw_padding pixels.
|
||||
* If blockids is not NULL, the block-id of each line is also returned as an
|
||||
* array of one element per line. delete [] after use.
|
||||
* If paraids is not NULL, the paragraph-id of each line within its block is
|
||||
* also returned as an array of one element per line. delete [] after use.
|
||||
*/
|
||||
Boxa* GetTextlines(const bool raw_image, const int raw_padding,
|
||||
Pixa** pixa, int** blockids, int** paraids);
|
||||
/*
|
||||
Helper method to extract from the thresholded image. (most common usage)
|
||||
*/
|
||||
Boxa* GetTextlines(Pixa** pixa, int** blockids) {
|
||||
return GetTextlines(false, 0, pixa, blockids, NULL);
|
||||
}
|
||||
|
||||
/**
|
||||
* Get textlines and strips of image regions as a leptonica-style Boxa, Pixa
|
||||
* pair, in reading order. Enables downstream handling of non-rectangular
|
||||
* regions.
|
||||
* Can be called before or after Recognize.
|
||||
* If blockids is not NULL, the block-id of each line is also returned as an
|
||||
* array of one element per line. delete [] after use.
|
||||
*/
|
||||
Boxa* GetStrips(Pixa** pixa, int** blockids);
|
||||
|
||||
/**
|
||||
* Get the words as a leptonica-style
|
||||
* Boxa, Pixa pair, in reading order.
|
||||
* Can be called before or after Recognize.
|
||||
*/
|
||||
Boxa* GetWords(Pixa** pixa);
|
||||
|
||||
/**
|
||||
* Gets the individual connected (text) components (created
|
||||
* after pages segmentation step, but before recognition)
|
||||
* as a leptonica-style Boxa, Pixa pair, in reading order.
|
||||
* Can be called before or after Recognize.
|
||||
* Note: the caller is responsible for calling boxaDestroy()
|
||||
* on the returned Boxa array and pixaDestroy() on cc array.
|
||||
*/
|
||||
Boxa* GetConnectedComponents(Pixa** cc);
|
||||
|
||||
/**
|
||||
* Get the given level kind of components (block, textline, word etc.) as a
|
||||
* leptonica-style Boxa, Pixa pair, in reading order.
|
||||
* Can be called before or after Recognize.
|
||||
* If blockids is not NULL, the block-id of each component is also returned
|
||||
* as an array of one element per component. delete [] after use.
|
||||
* If blockids is not NULL, the paragraph-id of each component with its block
|
||||
* is also returned as an array of one element per component. delete [] after
|
||||
* use.
|
||||
* If raw_image is true, then portions of the original image are extracted
|
||||
* instead of the thresholded image and padded with raw_padding.
|
||||
* If text_only is true, then only text components are returned.
|
||||
*/
|
||||
Boxa* GetComponentImages(const PageIteratorLevel level,
|
||||
const bool text_only, const bool raw_image,
|
||||
const int raw_padding,
|
||||
Pixa** pixa, int** blockids, int** paraids);
|
||||
// Helper function to get binary images with no padding (most common usage).
|
||||
Boxa* GetComponentImages(const PageIteratorLevel level,
|
||||
const bool text_only,
|
||||
Pixa** pixa, int** blockids) {
|
||||
return GetComponentImages(level, text_only, false, 0, pixa, blockids, NULL);
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the scale factor of the thresholded image that would be returned by
|
||||
* GetThresholdedImage() and the various GetX() methods that call
|
||||
* GetComponentImages().
|
||||
* Returns 0 if no thresholder has been set.
|
||||
*/
|
||||
int GetThresholdedImageScaleFactor() const;
|
||||
|
||||
/**
|
||||
* Dump the internal binary image to a PGM file.
|
||||
* @deprecated Use GetThresholdedImage and write the image using pixWrite
|
||||
* instead if possible.
|
||||
*/
|
||||
void DumpPGM(const char* filename);
|
||||
|
||||
/**
|
||||
* Runs page layout analysis in the mode set by SetPageSegMode.
|
||||
* May optionally be called prior to Recognize to get access to just
|
||||
* the page layout results. Returns an iterator to the results.
|
||||
* If merge_similar_words is true, words are combined where suitable for use
|
||||
* with a line recognizer. Use if you want to use AnalyseLayout to find the
|
||||
* textlines, and then want to process textline fragments with an external
|
||||
* line recognizer.
|
||||
* Returns NULL on error or an empty page.
|
||||
* The returned iterator must be deleted after use.
|
||||
* WARNING! This class points to data held within the TessBaseAPI class, and
|
||||
* therefore can only be used while the TessBaseAPI class still exists and
|
||||
* has not been subjected to a call of Init, SetImage, Recognize, Clear, End
|
||||
* DetectOS, or anything else that changes the internal PAGE_RES.
|
||||
*/
|
||||
PageIterator* AnalyseLayout();
|
||||
PageIterator* AnalyseLayout(bool merge_similar_words);
|
||||
int AnalyseLayout1();
|
||||
|
||||
/**
|
||||
* Recognize the image from SetAndThresholdImage, generating Tesseract
|
||||
* internal structures. Returns 0 on success.
|
||||
* Optional. The Get*Text functions below will call Recognize if needed.
|
||||
* After Recognize, the output is kept internally until the next SetImage.
|
||||
*/
|
||||
int Recognize(ETEXT_DESC* monitor);
|
||||
|
||||
/**
|
||||
* Methods to retrieve information after SetAndThresholdImage(),
|
||||
* Recognize() or TesseractRect(). (Recognize is called implicitly if needed.)
|
||||
*/
|
||||
|
||||
/** Variant on Recognize used for testing chopper. */
|
||||
int RecognizeForChopTest(ETEXT_DESC* monitor);
|
||||
|
||||
/**
|
||||
* Turns images into symbolic text.
|
||||
*
|
||||
* filename can point to a single image, a multi-page TIFF,
|
||||
* or a plain text list of image filenames.
|
||||
*
|
||||
* retry_config is useful for debugging. If not NULL, you can fall
|
||||
* back to an alternate configuration if a page fails for some
|
||||
* reason.
|
||||
*
|
||||
* timeout_millisec terminates processing if any single page
|
||||
* takes too long. Set to 0 for unlimited time.
|
||||
*
|
||||
* renderer is responible for creating the output. For example,
|
||||
* use the TessTextRenderer if you want plaintext output, or
|
||||
* the TessPDFRender to produce searchable PDF.
|
||||
*
|
||||
* If tessedit_page_number is non-negative, will only process that
|
||||
* single page. Works for multi-page tiff file, or filelist.
|
||||
*
|
||||
* Returns true if successful, false on error.
|
||||
*/
|
||||
bool ProcessPages(const char* filename, const char* retry_config,
|
||||
int timeout_millisec, TessResultRenderer* renderer);
|
||||
// Does the real work of ProcessPages.
|
||||
bool ProcessPagesInternal(const char* filename, const char* retry_config,
|
||||
int timeout_millisec, TessResultRenderer* renderer);
|
||||
|
||||
/**
|
||||
* Turn a single image into symbolic text.
|
||||
*
|
||||
* The pix is the image processed. filename and page_index are
|
||||
* metadata used by side-effect processes, such as reading a box
|
||||
* file or formatting as hOCR.
|
||||
*
|
||||
* See ProcessPages for desciptions of other parameters.
|
||||
*/
|
||||
bool ProcessPage(Pix* pix, int page_index, const char* filename,
|
||||
const char* retry_config, int timeout_millisec,
|
||||
TessResultRenderer* renderer,
|
||||
const char* jpgdata, int len);
|
||||
|
||||
/**
|
||||
* Get a reading-order iterator to the results of LayoutAnalysis and/or
|
||||
* Recognize. The returned iterator must be deleted after use.
|
||||
* WARNING! This class points to data held within the TessBaseAPI class, and
|
||||
* therefore can only be used while the TessBaseAPI class still exists and
|
||||
* has not been subjected to a call of Init, SetImage, Recognize, Clear, End
|
||||
* DetectOS, or anything else that changes the internal PAGE_RES.
|
||||
*/
|
||||
ResultIterator* GetIterator();
|
||||
|
||||
/**
|
||||
* Get a mutable iterator to the results of LayoutAnalysis and/or Recognize.
|
||||
* The returned iterator must be deleted after use.
|
||||
* WARNING! This class points to data held within the TessBaseAPI class, and
|
||||
* therefore can only be used while the TessBaseAPI class still exists and
|
||||
* has not been subjected to a call of Init, SetImage, Recognize, Clear, End
|
||||
* DetectOS, or anything else that changes the internal PAGE_RES.
|
||||
*/
|
||||
MutableIterator* GetMutableIterator();
|
||||
|
||||
/**
|
||||
* The recognized text is returned as a char* which is coded
|
||||
* as UTF8 and must be freed with the delete [] operator.
|
||||
*/
|
||||
char* GetUTF8Text();
|
||||
|
||||
/**
|
||||
* Make a HTML-formatted string with hOCR markup from the internal
|
||||
* data structures.
|
||||
* page_number is 0-based but will appear in the output as 1-based.
|
||||
* monitor can be used to
|
||||
* cancel the recognition
|
||||
* receive progress callbacks
|
||||
*/
|
||||
char* GetHOCRText(ETEXT_DESC* monitor, int page_number);
|
||||
|
||||
/**
|
||||
* Make a HTML-formatted string with hOCR markup from the internal
|
||||
* data structures.
|
||||
* page_number is 0-based but will appear in the output as 1-based.
|
||||
*/
|
||||
char* GetHOCRText(int page_number);
|
||||
|
||||
/**
|
||||
* Make a TSV-formatted string from the internal data structures.
|
||||
* page_number is 0-based but will appear in the output as 1-based.
|
||||
*/
|
||||
char* GetTSVText(int page_number);
|
||||
|
||||
/**
|
||||
* The recognized text is returned as a char* which is coded in the same
|
||||
* format as a box file used in training. Returned string must be freed with
|
||||
* the delete [] operator.
|
||||
* Constructs coordinates in the original image - not just the rectangle.
|
||||
* page_number is a 0-based page index that will appear in the box file.
|
||||
*/
|
||||
char* GetBoxText(int page_number);
|
||||
|
||||
/**
|
||||
* The recognized text is returned as a char* which is coded
|
||||
* as UNLV format Latin-1 with specific reject and suspect codes
|
||||
* and must be freed with the delete [] operator.
|
||||
*/
|
||||
char* GetUNLVText();
|
||||
|
||||
/**
|
||||
* Detect the orientation of the input image and apparent script (alphabet).
|
||||
* orient_deg is the detected clockwise rotation of the input image in degrees (0, 90, 180, 270)
|
||||
* orient_conf is the confidence (15.0 is reasonably confident)
|
||||
* script_name is an ASCII string, the name of the script, e.g. "Latin"
|
||||
* script_conf is confidence level in the script
|
||||
* Returns true on success and writes values to each parameter as an output
|
||||
*/
|
||||
bool DetectOrientationScript(int* orient_deg, float* orient_conf, const char** script_name, float* script_conf);
|
||||
|
||||
/**
|
||||
* The recognized text is returned as a char* which is coded
|
||||
* as UTF8 and must be freed with the delete [] operator.
|
||||
* page_number is a 0-based page index that will appear in the osd file.
|
||||
*/
|
||||
char* GetOsdText(int page_number);
|
||||
|
||||
/** Returns the (average) confidence value between 0 and 100. */
|
||||
int MeanTextConf();
|
||||
/**
|
||||
* Returns all word confidences (between 0 and 100) in an array, terminated
|
||||
* by -1. The calling function must delete [] after use.
|
||||
* The number of confidences should correspond to the number of space-
|
||||
* delimited words in GetUTF8Text.
|
||||
*/
|
||||
int* AllWordConfidences();
|
||||
|
||||
/**
|
||||
* Applies the given word to the adaptive classifier if possible.
|
||||
* The word must be SPACE-DELIMITED UTF-8 - l i k e t h i s , so it can
|
||||
* tell the boundaries of the graphemes.
|
||||
* Assumes that SetImage/SetRectangle have been used to set the image
|
||||
* to the given word. The mode arg should be PSM_SINGLE_WORD or
|
||||
* PSM_CIRCLE_WORD, as that will be used to control layout analysis.
|
||||
* The currently set PageSegMode is preserved.
|
||||
* Returns false if adaption was not possible for some reason.
|
||||
*/
|
||||
bool AdaptToWordStr(PageSegMode mode, const char* wordstr);
|
||||
|
||||
/**
|
||||
* Free up recognition results and any stored image data, without actually
|
||||
* freeing any recognition data that would be time-consuming to reload.
|
||||
* Afterwards, you must call SetImage or TesseractRect before doing
|
||||
* any Recognize or Get* operation.
|
||||
*/
|
||||
void Clear();
|
||||
|
||||
/**
|
||||
* Close down tesseract and free up all memory. End() is equivalent to
|
||||
* destructing and reconstructing your TessBaseAPI.
|
||||
* Once End() has been used, none of the other API functions may be used
|
||||
* other than Init and anything declared above it in the class definition.
|
||||
*/
|
||||
void End();
|
||||
|
||||
/**
|
||||
* Clear any library-level memory caches.
|
||||
* There are a variety of expensive-to-load constant data structures (mostly
|
||||
* language dictionaries) that are cached globally -- surviving the Init()
|
||||
* and End() of individual TessBaseAPI's. This function allows the clearing
|
||||
* of these caches.
|
||||
**/
|
||||
static void ClearPersistentCache();
|
||||
|
||||
/**
|
||||
* Check whether a word is valid according to Tesseract's language model
|
||||
* @return 0 if the word is invalid, non-zero if valid.
|
||||
* @warning temporary! This function will be removed from here and placed
|
||||
* in a separate API at some future time.
|
||||
*/
|
||||
int IsValidWord(const char *word);
|
||||
// Returns true if utf8_character is defined in the UniCharset.
|
||||
bool IsValidCharacter(const char *utf8_character);
|
||||
|
||||
|
||||
bool GetTextDirection(int* out_offset, float* out_slope);
|
||||
|
||||
/** Sets Dict::letter_is_okay_ function to point to the given function. */
|
||||
void SetDictFunc(DictFunc f);
|
||||
|
||||
/** Sets Dict::probability_in_context_ function to point to the given
|
||||
* function.
|
||||
*/
|
||||
void SetProbabilityInContextFunc(ProbabilityInContextFunc f);
|
||||
|
||||
/** Sets Wordrec::fill_lattice_ function to point to the given function. */
|
||||
void SetFillLatticeFunc(FillLatticeFunc f);
|
||||
|
||||
/**
|
||||
* Estimates the Orientation And Script of the image.
|
||||
* @return true if the image was processed successfully.
|
||||
*/
|
||||
bool DetectOS(OSResults*);
|
||||
|
||||
/** This method returns the features associated with the input image. */
|
||||
void GetFeaturesForBlob(TBLOB* blob, INT_FEATURE_STRUCT* int_features,
|
||||
int* num_features, int* feature_outline_index);
|
||||
|
||||
/**
|
||||
* This method returns the row to which a box of specified dimensions would
|
||||
* belong. If no good match is found, it returns NULL.
|
||||
*/
|
||||
static ROW* FindRowForBox(BLOCK_LIST* blocks, int left, int top,
|
||||
int right, int bottom);
|
||||
|
||||
/**
|
||||
* Method to run adaptive classifier on a blob.
|
||||
* It returns at max num_max_matches results.
|
||||
*/
|
||||
void RunAdaptiveClassifier(TBLOB* blob,
|
||||
int num_max_matches,
|
||||
int* unichar_ids,
|
||||
float* ratings,
|
||||
int* num_matches_returned);
|
||||
|
||||
/** This method returns the string form of the specified unichar. */
|
||||
const char* GetUnichar(int unichar_id);
|
||||
|
||||
/** Return the pointer to the i-th dawg loaded into tesseract_ object. */
|
||||
const Dawg *GetDawg(int i) const;
|
||||
|
||||
/** Return the number of dawgs loaded into tesseract_ object. */
|
||||
int NumDawgs() const;
|
||||
|
||||
/** Returns a ROW object created from the input row specification. */
|
||||
static ROW *MakeTessOCRRow(float baseline, float xheight,
|
||||
float descender, float ascender);
|
||||
|
||||
/** Returns a TBLOB corresponding to the entire input image. */
|
||||
static TBLOB *MakeTBLOB(Pix *pix);
|
||||
|
||||
/**
|
||||
* This method baseline normalizes a TBLOB in-place. The input row is used
|
||||
* for normalization. The denorm is an optional parameter in which the
|
||||
* normalization-antidote is returned.
|
||||
*/
|
||||
static void NormalizeTBLOB(TBLOB *tblob, ROW *row, bool numeric_mode);
|
||||
|
||||
Tesseract* tesseract() const { return tesseract_; }
|
||||
|
||||
OcrEngineMode oem() const { return last_oem_requested_; }
|
||||
|
||||
void InitTruthCallback(TruthCallback *cb) { truth_cb_ = cb; }
|
||||
|
||||
#ifndef NO_CUBE_BUILD
|
||||
/** Return a pointer to underlying CubeRecoContext object if present. */
|
||||
CubeRecoContext *GetCubeRecoContext() const;
|
||||
#endif // NO_CUBE_BUILD
|
||||
|
||||
void set_min_orientation_margin(double margin);
|
||||
|
||||
/**
|
||||
* Return text orientation of each block as determined by an earlier run
|
||||
* of layout analysis.
|
||||
*/
|
||||
void GetBlockTextOrientations(int** block_orientation,
|
||||
bool** vertical_writing);
|
||||
|
||||
/** Find lines from the image making the BLOCK_LIST. */
|
||||
BLOCK_LIST* FindLinesCreateBlockList();
|
||||
|
||||
/**
|
||||
* Delete a block list.
|
||||
* This is to keep BLOCK_LIST pointer opaque
|
||||
* and let go of including the other headers.
|
||||
*/
|
||||
static void DeleteBlockList(BLOCK_LIST* block_list);
|
||||
/* @} */
|
||||
|
||||
protected:
|
||||
|
||||
/** Common code for setting the image. Returns true if Init has been called. */
|
||||
TESS_LOCAL bool InternalSetImage();
|
||||
|
||||
/**
|
||||
* Run the thresholder to make the thresholded image. If pix is not NULL,
|
||||
* the source is thresholded to pix instead of the internal IMAGE.
|
||||
*/
|
||||
TESS_LOCAL virtual void Threshold(Pix** pix);
|
||||
|
||||
/**
|
||||
* Find lines from the image making the BLOCK_LIST.
|
||||
* @return 0 on success.
|
||||
*/
|
||||
TESS_LOCAL int FindLines();
|
||||
|
||||
/** Delete the pageres and block list ready for a new page. */
|
||||
void ClearResults();
|
||||
|
||||
/**
|
||||
* Return an LTR Result Iterator -- used only for training, as we really want
|
||||
* to ignore all BiDi smarts at that point.
|
||||
* delete once you're done with it.
|
||||
*/
|
||||
TESS_LOCAL LTRResultIterator* GetLTRIterator();
|
||||
|
||||
/**
|
||||
* Return the length of the output text string, as UTF8, assuming
|
||||
* one newline per line and one per block, with a terminator,
|
||||
* and assuming a single character reject marker for each rejected character.
|
||||
* Also return the number of recognized blobs in blob_count.
|
||||
*/
|
||||
TESS_LOCAL int TextLength(int* blob_count);
|
||||
|
||||
/** @defgroup ocropusAddOns ocropus add-ons */
|
||||
/* @{ */
|
||||
|
||||
/**
|
||||
* Adapt to recognize the current image as the given character.
|
||||
* The image must be preloaded and be just an image of a single character.
|
||||
*/
|
||||
TESS_LOCAL void AdaptToCharacter(const char *unichar_repr,
|
||||
int length,
|
||||
float baseline,
|
||||
float xheight,
|
||||
float descender,
|
||||
float ascender);
|
||||
|
||||
/** Recognize text doing one pass only, using settings for a given pass. */
|
||||
TESS_LOCAL PAGE_RES* RecognitionPass1(BLOCK_LIST* block_list);
|
||||
TESS_LOCAL PAGE_RES* RecognitionPass2(BLOCK_LIST* block_list,
|
||||
PAGE_RES* pass1_result);
|
||||
|
||||
//// paragraphs.cpp ////////////////////////////////////////////////////
|
||||
TESS_LOCAL void DetectParagraphs(bool after_text_recognition);
|
||||
|
||||
/**
|
||||
* Extract the OCR results, costs (penalty points for uncertainty),
|
||||
* and the bounding boxes of the characters.
|
||||
*/
|
||||
TESS_LOCAL static int TesseractExtractResult(char** text,
|
||||
int** lengths,
|
||||
float** costs,
|
||||
int** x0,
|
||||
int** y0,
|
||||
int** x1,
|
||||
int** y1,
|
||||
PAGE_RES* page_res);
|
||||
|
||||
TESS_LOCAL const PAGE_RES* GetPageRes() const { return page_res_; }
|
||||
/* @} */
|
||||
|
||||
|
||||
protected:
|
||||
Tesseract* tesseract_; ///< The underlying data object.
|
||||
Tesseract* osd_tesseract_; ///< For orientation & script detection.
|
||||
EquationDetect* equ_detect_; ///<The equation detector.
|
||||
ImageThresholder* thresholder_; ///< Image thresholding module.
|
||||
GenericVector<ParagraphModel *>* paragraph_models_;
|
||||
BLOCK_LIST* block_list_; ///< The page layout.
|
||||
PAGE_RES* page_res_; ///< The page-level data.
|
||||
STRING* input_file_; ///< Name used by training code.
|
||||
STRING* output_file_; ///< Name used by debug code.
|
||||
STRING* datapath_; ///< Current location of tessdata.
|
||||
STRING* language_; ///< Last initialized language.
|
||||
OcrEngineMode last_oem_requested_; ///< Last ocr language mode requested.
|
||||
bool recognition_done_; ///< page_res_ contains recognition data.
|
||||
TruthCallback *truth_cb_; /// fxn for setting truth_* in WERD_RES
|
||||
|
||||
/**
|
||||
* @defgroup ThresholderParams Thresholder Parameters
|
||||
* Parameters saved from the Thresholder. Needed to rebuild coordinates.
|
||||
*/
|
||||
/* @{ */
|
||||
int rect_left_;
|
||||
int rect_top_;
|
||||
int rect_width_;
|
||||
int rect_height_;
|
||||
int image_width_;
|
||||
int image_height_;
|
||||
/* @} */
|
||||
|
||||
private:
|
||||
// A list of image filenames gets special consideration
|
||||
bool ProcessPagesFileList(FILE *fp,
|
||||
STRING *buf,
|
||||
const char* retry_config, int timeout_millisec,
|
||||
TessResultRenderer* renderer,
|
||||
int tessedit_page_number);
|
||||
// TIFF supports multipage so gets special consideration.
|
||||
bool ProcessPagesMultipageTiff(const unsigned char *data,
|
||||
size_t size,
|
||||
const char* filename,
|
||||
const char* retry_config,
|
||||
int timeout_millisec,
|
||||
TessResultRenderer* renderer,
|
||||
int tessedit_page_number);
|
||||
// There's currently no way to pass a document title from the
|
||||
// Tesseract command line, and we have multiple places that choose
|
||||
// to set the title to an empty string. Using a single named
|
||||
// variable will hopefully reduce confusion if the situation changes
|
||||
// in the future.
|
||||
const char *unknown_title_;
|
||||
}; // class TessBaseAPI.
|
||||
|
||||
/** Escape a char string - remove &<>"' with HTML codes. */
|
||||
STRING HOcrEscape(const char* text);
|
||||
} // namespace tesseract.
|
||||
|
||||
#endif // TESSERACT_API_BASEAPI_H__
|
|
@ -0,0 +1,826 @@
|
|||
///////////////////////////////////////////////////////////////////////
|
||||
// File: capi.cpp
|
||||
// Description: C-API TessBaseAPI
|
||||
//
|
||||
// (C) Copyright 2012, Google Inc.
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
///////////////////////////////////////////////////////////////////////
|
||||
|
||||
#ifndef TESS_CAPI_INCLUDE_BASEAPI
|
||||
# define TESS_CAPI_INCLUDE_BASEAPI
|
||||
#endif
|
||||
#include "capi.h"
|
||||
#include "genericvector.h"
|
||||
#include "strngs.h"
|
||||
|
||||
TESS_API const char* TESS_CALL TessVersion()
|
||||
{
|
||||
return TessBaseAPI::Version();
|
||||
}
|
||||
|
||||
TESS_API void TESS_CALL TessDeleteText(char* text)
|
||||
{
|
||||
delete[] text;
|
||||
}
|
||||
|
||||
TESS_API void TESS_CALL TessDeleteTextArray(char** arr)
|
||||
{
|
||||
for (char** pos = arr; *pos != NULL; ++pos)
|
||||
delete[] * pos;
|
||||
delete[] arr;
|
||||
}
|
||||
|
||||
TESS_API void TESS_CALL TessDeleteIntArray(int* arr)
|
||||
{
|
||||
delete[] arr;
|
||||
}
|
||||
|
||||
TESS_API void TESS_CALL TessDeleteBlockList(BLOCK_LIST* block_list)
|
||||
{
|
||||
TessBaseAPI::DeleteBlockList(block_list);
|
||||
}
|
||||
|
||||
TESS_API TessResultRenderer* TESS_CALL TessTextRendererCreate(const char* outputbase)
|
||||
{
|
||||
return new TessTextRenderer(outputbase);
|
||||
}
|
||||
|
||||
TESS_API TessResultRenderer* TESS_CALL TessHOcrRendererCreate(const char* outputbase)
|
||||
{
|
||||
return new TessHOcrRenderer(outputbase);
|
||||
}
|
||||
|
||||
TESS_API TessResultRenderer* TESS_CALL TessHOcrRendererCreate2(const char* outputbase, BOOL font_info)
|
||||
{
|
||||
return new TessHOcrRenderer(outputbase, font_info);
|
||||
}
|
||||
|
||||
TESS_API TessResultRenderer* TESS_CALL TessPDFRendererCreate(const char* outputbase, const char* datadir)
|
||||
{
|
||||
return new TessPDFRenderer(outputbase, datadir, false);
|
||||
}
|
||||
|
||||
TESS_API TessResultRenderer* TESS_CALL TessPDFRendererCreateTextonly(const char* outputbase, const char* datadir,
|
||||
BOOL textonly)
|
||||
{
|
||||
return new TessPDFRenderer(outputbase, datadir, textonly);
|
||||
}
|
||||
|
||||
TESS_API TessResultRenderer* TESS_CALL TessUnlvRendererCreate(const char* outputbase)
|
||||
{
|
||||
return new TessUnlvRenderer(outputbase);
|
||||
}
|
||||
|
||||
TESS_API TessResultRenderer* TESS_CALL TessBoxTextRendererCreate(const char* outputbase)
|
||||
{
|
||||
return new TessBoxTextRenderer(outputbase);
|
||||
}
|
||||
|
||||
TESS_API void TESS_CALL TessDeleteResultRenderer(TessResultRenderer* renderer)
|
||||
{
|
||||
delete renderer;
|
||||
}
|
||||
|
||||
TESS_API void TESS_CALL TessResultRendererInsert(TessResultRenderer* renderer, TessResultRenderer* next)
|
||||
{
|
||||
renderer->insert(next);
|
||||
}
|
||||
|
||||
TESS_API TessResultRenderer* TESS_CALL TessResultRendererNext(TessResultRenderer* renderer)
|
||||
{
|
||||
return renderer->next();
|
||||
}
|
||||
|
||||
TESS_API BOOL TESS_CALL TessResultRendererBeginDocument(TessResultRenderer* renderer, const char* title)
|
||||
{
|
||||
return renderer->BeginDocument(title);
|
||||
}
|
||||
|
||||
TESS_API BOOL TESS_CALL TessResultRendererAddImage(TessResultRenderer* renderer, TessBaseAPI* api)
|
||||
{
|
||||
return renderer->AddImage(api, nullptr, 0);
|
||||
}
|
||||
|
||||
TESS_API BOOL TESS_CALL TessResultRendererEndDocument(TessResultRenderer* renderer)
|
||||
{
|
||||
return renderer->EndDocument();
|
||||
}
|
||||
|
||||
TESS_API const char* TESS_CALL TessResultRendererExtention(TessResultRenderer* renderer)
|
||||
{
|
||||
return renderer->file_extension();
|
||||
}
|
||||
|
||||
TESS_API const char* TESS_CALL TessResultRendererTitle(TessResultRenderer* renderer)
|
||||
{
|
||||
return renderer->title();
|
||||
}
|
||||
|
||||
TESS_API int TESS_CALL TessResultRendererImageNum(TessResultRenderer* renderer)
|
||||
{
|
||||
return renderer->imagenum();
|
||||
}
|
||||
|
||||
TESS_API TessBaseAPI* TESS_CALL TessBaseAPICreate()
|
||||
{
|
||||
return new TessBaseAPI;
|
||||
}
|
||||
|
||||
TESS_API void TESS_CALL TessBaseAPIDelete(TessBaseAPI* handle)
|
||||
{
|
||||
delete handle;
|
||||
}
|
||||
|
||||
TESS_API size_t TESS_CALL TessBaseAPIGetOpenCLDevice(TessBaseAPI* handle, void **device)
|
||||
{
|
||||
return handle->getOpenCLDevice(device);
|
||||
}
|
||||
|
||||
TESS_API void TESS_CALL TessBaseAPISetInputName(TessBaseAPI* handle, const char* name)
|
||||
{
|
||||
handle->SetInputName(name);
|
||||
}
|
||||
|
||||
TESS_API const char* TESS_CALL TessBaseAPIGetInputName(TessBaseAPI* handle)
|
||||
{
|
||||
return handle->GetInputName();
|
||||
}
|
||||
|
||||
TESS_API void TESS_CALL TessBaseAPISetInputImage(TessBaseAPI* handle, Pix* pix)
|
||||
{
|
||||
handle->SetInputImage(pix);
|
||||
}
|
||||
|
||||
TESS_API Pix* TESS_CALL TessBaseAPIGetInputImage(TessBaseAPI* handle)
|
||||
{
|
||||
return handle->GetInputImage();
|
||||
}
|
||||
|
||||
TESS_API int TESS_CALL TessBaseAPIGetSourceYResolution(TessBaseAPI* handle)
|
||||
{
|
||||
return handle->GetSourceYResolution();
|
||||
}
|
||||
|
||||
TESS_API const char* TESS_CALL TessBaseAPIGetDatapath(TessBaseAPI* handle)
|
||||
{
|
||||
return handle->GetDatapath();
|
||||
}
|
||||
|
||||
TESS_API void TESS_CALL TessBaseAPISetOutputName(TessBaseAPI* handle, const char* name)
|
||||
{
|
||||
handle->SetOutputName(name);
|
||||
}
|
||||
|
||||
TESS_API BOOL TESS_CALL TessBaseAPISetVariable(TessBaseAPI* handle, const char* name, const char* value)
|
||||
{
|
||||
return handle->SetVariable(name, value) ? TRUE : FALSE;
|
||||
}
|
||||
|
||||
TESS_API BOOL TESS_CALL TessBaseAPISetDebugVariable(TessBaseAPI* handle, const char* name, const char* value)
|
||||
{
|
||||
return handle->SetVariable(name, value) ? TRUE : FALSE;
|
||||
}
|
||||
|
||||
TESS_API BOOL TESS_CALL TessBaseAPIGetIntVariable(const TessBaseAPI* handle, const char* name, int* value)
|
||||
{
|
||||
return handle->GetIntVariable(name, value) ? TRUE : FALSE;
|
||||
}
|
||||
|
||||
TESS_API BOOL TESS_CALL TessBaseAPIGetBoolVariable(const TessBaseAPI* handle, const char* name, BOOL* value)
|
||||
{
|
||||
bool boolValue;
|
||||
if (handle->GetBoolVariable(name, &boolValue))
|
||||
{
|
||||
*value = boolValue ? TRUE : FALSE;
|
||||
return TRUE;
|
||||
}
|
||||
else
|
||||
{
|
||||
return FALSE;
|
||||
}
|
||||
}
|
||||
|
||||
TESS_API BOOL TESS_CALL TessBaseAPIGetDoubleVariable(const TessBaseAPI* handle, const char* name, double* value)
|
||||
{
|
||||
return handle->GetDoubleVariable(name, value) ? TRUE : FALSE;
|
||||
}
|
||||
|
||||
TESS_API const char* TESS_CALL TessBaseAPIGetStringVariable(const TessBaseAPI* handle, const char* name)
|
||||
{
|
||||
return handle->GetStringVariable(name);
|
||||
}
|
||||
|
||||
TESS_API void TESS_CALL TessBaseAPIPrintVariables(const TessBaseAPI* handle, FILE* fp)
|
||||
{
|
||||
handle->PrintVariables(fp);
|
||||
}
|
||||
|
||||
TESS_API BOOL TESS_CALL TessBaseAPIPrintVariablesToFile(const TessBaseAPI* handle, const char* filename)
|
||||
{
|
||||
FILE* fp = fopen(filename, "w");
|
||||
if (fp != NULL)
|
||||
{
|
||||
handle->PrintVariables(fp);
|
||||
fclose(fp);
|
||||
return TRUE;
|
||||
}
|
||||
return FALSE;
|
||||
}
|
||||
|
||||
TESS_API BOOL TESS_CALL TessBaseAPIGetVariableAsString(TessBaseAPI* handle, const char* name, STRING* val)
|
||||
{
|
||||
return handle->GetVariableAsString(name, val) ? TRUE : FALSE;
|
||||
}
|
||||
|
||||
TESS_API int TESS_CALL TessBaseAPIInit4(TessBaseAPI* handle, const char* datapath, const char* language,
|
||||
TessOcrEngineMode mode, char** configs, int configs_size,
|
||||
char** vars_vec, char** vars_values, size_t vars_vec_size,
|
||||
BOOL set_only_non_debug_params)
|
||||
{
|
||||
GenericVector<STRING> varNames;
|
||||
GenericVector<STRING> varValues;
|
||||
if (vars_vec != NULL && vars_values != NULL) {
|
||||
for (size_t i = 0; i < vars_vec_size; i++) {
|
||||
varNames.push_back(STRING(vars_vec[i]));
|
||||
varValues.push_back(STRING(vars_values[i]));
|
||||
}
|
||||
}
|
||||
|
||||
return handle->Init(datapath, language, mode, configs, configs_size, &varNames, &varValues, set_only_non_debug_params);
|
||||
}
|
||||
|
||||
|
||||
TESS_API int TESS_CALL TessBaseAPIInit1(TessBaseAPI* handle, const char* datapath, const char* language, TessOcrEngineMode oem,
|
||||
char** configs, int configs_size)
|
||||
{
|
||||
return handle->Init(datapath, language, oem, configs, configs_size, NULL, NULL, false);
|
||||
}
|
||||
|
||||
TESS_API int TESS_CALL TessBaseAPIInit2(TessBaseAPI* handle, const char* datapath, const char* language, TessOcrEngineMode oem)
|
||||
{
|
||||
return handle->Init(datapath, language, oem);
|
||||
}
|
||||
|
||||
TESS_API int TESS_CALL TessBaseAPIInit3(TessBaseAPI* handle, const char* datapath, const char* language)
|
||||
{
|
||||
return handle->Init(datapath, language);
|
||||
}
|
||||
|
||||
TESS_API const char* TESS_CALL TessBaseAPIGetInitLanguagesAsString(const TessBaseAPI* handle)
|
||||
{
|
||||
return handle->GetInitLanguagesAsString();
|
||||
}
|
||||
|
||||
TESS_API char** TESS_CALL TessBaseAPIGetLoadedLanguagesAsVector(const TessBaseAPI* handle)
|
||||
{
|
||||
GenericVector<STRING> languages;
|
||||
handle->GetLoadedLanguagesAsVector(&languages);
|
||||
char** arr = new char*[languages.size() + 1];
|
||||
for (int index = 0; index < languages.size(); ++index)
|
||||
arr[index] = languages[index].strdup();
|
||||
arr[languages.size()] = NULL;
|
||||
return arr;
|
||||
}
|
||||
|
||||
TESS_API char** TESS_CALL TessBaseAPIGetAvailableLanguagesAsVector(const TessBaseAPI* handle)
|
||||
{
|
||||
GenericVector<STRING> languages;
|
||||
handle->GetAvailableLanguagesAsVector(&languages);
|
||||
char** arr = new char*[languages.size() + 1];
|
||||
for (int index = 0; index < languages.size(); ++index)
|
||||
arr[index] = languages[index].strdup();
|
||||
arr[languages.size()] = NULL;
|
||||
return arr;
|
||||
}
|
||||
|
||||
TESS_API int TESS_CALL TessBaseAPIInitLangMod(TessBaseAPI* handle, const char* datapath, const char* language)
|
||||
{
|
||||
return handle->InitLangMod(datapath, language);
|
||||
}
|
||||
|
||||
TESS_API void TESS_CALL TessBaseAPIInitForAnalysePage(TessBaseAPI* handle)
|
||||
{
|
||||
handle->InitForAnalysePage();
|
||||
}
|
||||
|
||||
TESS_API void TESS_CALL TessBaseAPIReadConfigFile(TessBaseAPI* handle, const char* filename)
|
||||
{
|
||||
handle->ReadConfigFile(filename);
|
||||
}
|
||||
|
||||
TESS_API void TESS_CALL TessBaseAPIReadDebugConfigFile(TessBaseAPI* handle, const char* filename)
|
||||
{
|
||||
handle->ReadDebugConfigFile(filename);
|
||||
}
|
||||
|
||||
TESS_API void TESS_CALL TessBaseAPISetPageSegMode(TessBaseAPI* handle, TessPageSegMode mode)
|
||||
{
|
||||
handle->SetPageSegMode(mode);
|
||||
}
|
||||
|
||||
TESS_API TessPageSegMode TESS_CALL TessBaseAPIGetPageSegMode(const TessBaseAPI* handle)
|
||||
{
|
||||
return handle->GetPageSegMode();
|
||||
}
|
||||
|
||||
TESS_API char* TESS_CALL TessBaseAPIRect(TessBaseAPI* handle, const unsigned char* imagedata,
|
||||
int bytes_per_pixel, int bytes_per_line,
|
||||
int left, int top, int width, int height)
|
||||
{
|
||||
return handle->TesseractRect(imagedata, bytes_per_pixel, bytes_per_line, left, top, width, height);
|
||||
}
|
||||
|
||||
TESS_API void TESS_CALL TessBaseAPIClearAdaptiveClassifier(TessBaseAPI* handle)
|
||||
{
|
||||
handle->ClearAdaptiveClassifier();
|
||||
}
|
||||
|
||||
TESS_API void TESS_CALL TessBaseAPISetImage(TessBaseAPI* handle, const unsigned char* imagedata, int width, int height,
|
||||
int bytes_per_pixel, int bytes_per_line)
|
||||
{
|
||||
handle->SetImage(imagedata, width, height, bytes_per_pixel, bytes_per_line);
|
||||
}
|
||||
|
||||
TESS_API void TESS_CALL TessBaseAPISetImage2(TessBaseAPI* handle, struct Pix* pix)
|
||||
{
|
||||
return handle->SetImage(pix);
|
||||
}
|
||||
|
||||
TESS_API void TESS_CALL TessBaseAPISetSourceResolution(TessBaseAPI* handle, int ppi)
|
||||
{
|
||||
handle->SetSourceResolution(ppi);
|
||||
}
|
||||
|
||||
TESS_API void TESS_CALL TessBaseAPISetRectangle(TessBaseAPI* handle, int left, int top, int width, int height)
|
||||
{
|
||||
handle->SetRectangle(left, top, width, height);
|
||||
}
|
||||
|
||||
TESS_API void TESS_CALL TessBaseAPISetThresholder(TessBaseAPI* handle, TessImageThresholder* thresholder)
|
||||
{
|
||||
handle->SetThresholder(thresholder);
|
||||
}
|
||||
|
||||
TESS_API struct Pix* TESS_CALL TessBaseAPIGetThresholdedImage(TessBaseAPI* handle)
|
||||
{
|
||||
return handle->GetThresholdedImage();
|
||||
}
|
||||
|
||||
TESS_API struct Boxa* TESS_CALL TessBaseAPIGetRegions(TessBaseAPI* handle, struct Pixa** pixa)
|
||||
{
|
||||
return handle->GetRegions(pixa);
|
||||
}
|
||||
|
||||
TESS_API struct Boxa* TESS_CALL TessBaseAPIGetTextlines(TessBaseAPI* handle, struct Pixa** pixa, int** blockids)
|
||||
{
|
||||
return handle->GetTextlines(pixa, blockids);
|
||||
}
|
||||
|
||||
TESS_API struct Boxa* TESS_CALL TessBaseAPIGetTextlines1(TessBaseAPI* handle, const BOOL raw_image, const int raw_padding,
|
||||
struct Pixa** pixa, int** blockids, int** paraids)
|
||||
{
|
||||
return handle->GetTextlines(raw_image, raw_padding, pixa, blockids, paraids);
|
||||
}
|
||||
|
||||
TESS_API struct Boxa* TESS_CALL TessBaseAPIGetStrips(TessBaseAPI* handle, struct Pixa** pixa, int** blockids)
|
||||
{
|
||||
return handle->GetStrips(pixa, blockids);
|
||||
}
|
||||
|
||||
TESS_API struct Boxa* TESS_CALL TessBaseAPIGetWords(TessBaseAPI* handle, struct Pixa** pixa)
|
||||
{
|
||||
return handle->GetWords(pixa);
|
||||
}
|
||||
|
||||
TESS_API struct Boxa* TESS_CALL TessBaseAPIGetConnectedComponents(TessBaseAPI* handle, struct Pixa** cc)
|
||||
{
|
||||
return handle->GetConnectedComponents(cc);
|
||||
}
|
||||
|
||||
TESS_API struct Boxa* TESS_CALL TessBaseAPIGetComponentImages(TessBaseAPI* handle, TessPageIteratorLevel level, BOOL text_only, struct Pixa** pixa, int** blockids)
|
||||
{
|
||||
return handle->GetComponentImages(level, text_only != FALSE, pixa, blockids);
|
||||
}
|
||||
|
||||
TESS_API struct Boxa*
|
||||
TESS_CALL TessBaseAPIGetComponentImages1(TessBaseAPI* handle, const TessPageIteratorLevel level, const BOOL text_only,
|
||||
const BOOL raw_image, const int raw_padding,
|
||||
struct Pixa** pixa, int** blockids, int** paraids)
|
||||
{
|
||||
return handle->GetComponentImages(level, text_only != FALSE, raw_image, raw_padding, pixa, blockids, paraids);
|
||||
}
|
||||
|
||||
TESS_API int TESS_CALL TessBaseAPIGetThresholdedImageScaleFactor(const TessBaseAPI* handle)
|
||||
{
|
||||
return handle->GetThresholdedImageScaleFactor();
|
||||
}
|
||||
|
||||
TESS_API void TESS_CALL TessBaseAPIDumpPGM(TessBaseAPI* handle, const char* filename)
|
||||
{
|
||||
handle->DumpPGM(filename);
|
||||
}
|
||||
|
||||
TESS_API TessPageIterator* TESS_CALL TessBaseAPIAnalyseLayout(TessBaseAPI* handle)
|
||||
{
|
||||
return handle->AnalyseLayout();
|
||||
}
|
||||
|
||||
TESS_API int TESS_CALL TessBaseAPIRecognize(TessBaseAPI* handle, ETEXT_DESC* monitor)
|
||||
{
|
||||
return handle->Recognize(monitor);
|
||||
}
|
||||
|
||||
TESS_API int TESS_CALL TessBaseAPIRecognizeForChopTest(TessBaseAPI* handle, ETEXT_DESC* monitor)
|
||||
{
|
||||
return handle->RecognizeForChopTest(monitor);
|
||||
}
|
||||
|
||||
TESS_API BOOL TESS_CALL TessBaseAPIProcessPages(TessBaseAPI* handle, const char* filename, const char* retry_config,
|
||||
int timeout_millisec, TessResultRenderer* renderer)
|
||||
{
|
||||
if (handle->ProcessPages(filename, retry_config, timeout_millisec, renderer))
|
||||
return TRUE;
|
||||
else
|
||||
return FALSE;
|
||||
}
|
||||
|
||||
TESS_API BOOL TESS_CALL TessBaseAPIProcessPage(TessBaseAPI* handle, struct Pix* pix, int page_index, const char* filename,
|
||||
const char* retry_config, int timeout_millisec, TessResultRenderer* renderer)
|
||||
{
|
||||
if (handle->ProcessPage(pix, page_index, filename, retry_config, timeout_millisec, renderer, nullptr, 0))
|
||||
return TRUE;
|
||||
else
|
||||
return FALSE;
|
||||
}
|
||||
|
||||
TESS_API TessResultIterator* TESS_CALL TessBaseAPIGetIterator(TessBaseAPI* handle)
|
||||
{
|
||||
return handle->GetIterator();
|
||||
}
|
||||
|
||||
TESS_API TessMutableIterator* TESS_CALL TessBaseAPIGetMutableIterator(TessBaseAPI* handle)
|
||||
{
|
||||
return handle->GetMutableIterator();
|
||||
}
|
||||
|
||||
TESS_API char* TESS_CALL TessBaseAPIGetUTF8Text(TessBaseAPI* handle)
|
||||
{
|
||||
return handle->GetUTF8Text();
|
||||
}
|
||||
|
||||
TESS_API char* TESS_CALL TessBaseAPIGetHOCRText(TessBaseAPI* handle, int page_number)
|
||||
{
|
||||
return handle->GetHOCRText(NULL, page_number);
|
||||
}
|
||||
|
||||
TESS_API char* TESS_CALL TessBaseAPIGetBoxText(TessBaseAPI* handle, int page_number)
|
||||
{
|
||||
return handle->GetBoxText(page_number);
|
||||
}
|
||||
|
||||
TESS_API char* TESS_CALL TessBaseAPIGetUNLVText(TessBaseAPI* handle)
|
||||
{
|
||||
return handle->GetUNLVText();
|
||||
}
|
||||
|
||||
TESS_API int TESS_CALL TessBaseAPIMeanTextConf(TessBaseAPI* handle)
|
||||
{
|
||||
return handle->MeanTextConf();
|
||||
}
|
||||
|
||||
TESS_API int* TESS_CALL TessBaseAPIAllWordConfidences(TessBaseAPI* handle)
|
||||
{
|
||||
return handle->AllWordConfidences();
|
||||
}
|
||||
|
||||
TESS_API BOOL TESS_CALL TessBaseAPIAdaptToWordStr(TessBaseAPI* handle, TessPageSegMode mode, const char* wordstr)
|
||||
{
|
||||
return handle->AdaptToWordStr(mode, wordstr) ? TRUE : FALSE;
|
||||
}
|
||||
|
||||
TESS_API void TESS_CALL TessBaseAPIClear(TessBaseAPI* handle)
|
||||
{
|
||||
handle->Clear();
|
||||
}
|
||||
|
||||
TESS_API void TESS_CALL TessBaseAPIEnd(TessBaseAPI* handle)
|
||||
{
|
||||
handle->End();
|
||||
}
|
||||
|
||||
TESS_API int TESS_CALL TessBaseAPIIsValidWord(TessBaseAPI* handle, const char* word)
|
||||
{
|
||||
return handle->IsValidWord(word);
|
||||
}
|
||||
|
||||
TESS_API BOOL TESS_CALL TessBaseAPIGetTextDirection(TessBaseAPI* handle, int* out_offset, float* out_slope)
|
||||
{
|
||||
return handle->GetTextDirection(out_offset, out_slope) ? TRUE : FALSE;
|
||||
}
|
||||
|
||||
TESS_API void TESS_CALL TessBaseAPISetDictFunc(TessBaseAPI* handle, TessDictFunc f)
|
||||
{
|
||||
handle->SetDictFunc(f);
|
||||
}
|
||||
|
||||
TESS_API void TESS_CALL TessBaseAPIClearPersistentCache(TessBaseAPI* handle)
|
||||
{
|
||||
handle->ClearPersistentCache();
|
||||
}
|
||||
|
||||
TESS_API void TESS_CALL TessBaseAPISetProbabilityInContextFunc(TessBaseAPI* handle, TessProbabilityInContextFunc f)
|
||||
{
|
||||
handle->SetProbabilityInContextFunc(f);
|
||||
}
|
||||
|
||||
TESS_API BOOL TESS_CALL TessBaseAPIDetectOS(TessBaseAPI* handle, OSResults* results)
|
||||
{
|
||||
return FALSE; // Unsafe ABI, return FALSE always
|
||||
}
|
||||
|
||||
TESS_API BOOL TESS_CALL TessBaseAPIDetectOrientationScript(TessBaseAPI* handle,
|
||||
int* orient_deg, float* orient_conf, const char** script_name, float* script_conf)
|
||||
{
|
||||
bool success;
|
||||
success = handle->DetectOrientationScript(orient_deg, orient_conf, script_name, script_conf);
|
||||
return (BOOL)success;
|
||||
}
|
||||
|
||||
|
||||
TESS_API void TESS_CALL TessBaseAPIGetFeaturesForBlob(TessBaseAPI* handle, TBLOB* blob, INT_FEATURE_STRUCT* int_features,
|
||||
int* num_features, int* FeatureOutlineIndex)
|
||||
{
|
||||
handle->GetFeaturesForBlob(blob, int_features, num_features, FeatureOutlineIndex);
|
||||
}
|
||||
|
||||
TESS_API ROW* TESS_CALL TessFindRowForBox(BLOCK_LIST* blocks, int left, int top, int right, int bottom)
|
||||
{
|
||||
return TessBaseAPI::FindRowForBox(blocks, left, top, right, bottom);
|
||||
}
|
||||
|
||||
TESS_API void TESS_CALL TessBaseAPIRunAdaptiveClassifier(TessBaseAPI* handle, TBLOB* blob, int num_max_matches,
|
||||
int* unichar_ids, float* ratings, int* num_matches_returned)
|
||||
{
|
||||
handle->RunAdaptiveClassifier(blob, num_max_matches, unichar_ids, ratings, num_matches_returned);
|
||||
}
|
||||
|
||||
TESS_API const char* TESS_CALL TessBaseAPIGetUnichar(TessBaseAPI* handle, int unichar_id)
|
||||
{
|
||||
return handle->GetUnichar(unichar_id);
|
||||
}
|
||||
|
||||
TESS_API const TessDawg* TESS_CALL TessBaseAPIGetDawg(const TessBaseAPI* handle, int i)
|
||||
{
|
||||
return handle->GetDawg(i);
|
||||
}
|
||||
|
||||
TESS_API int TESS_CALL TessBaseAPINumDawgs(const TessBaseAPI* handle)
|
||||
{
|
||||
return handle->NumDawgs();
|
||||
}
|
||||
|
||||
TESS_API ROW* TESS_CALL TessMakeTessOCRRow(float baseline, float xheight, float descender, float ascender)
|
||||
{
|
||||
return TessBaseAPI::MakeTessOCRRow(baseline, xheight, descender, ascender);
|
||||
}
|
||||
|
||||
TESS_API TBLOB* TESS_CALL TessMakeTBLOB(struct Pix* pix)
|
||||
{
|
||||
return TessBaseAPI::MakeTBLOB(pix);
|
||||
}
|
||||
|
||||
TESS_API void TESS_CALL TessNormalizeTBLOB(TBLOB* tblob, ROW* row, BOOL numeric_mode)
|
||||
{
|
||||
TessBaseAPI::NormalizeTBLOB(tblob, row, numeric_mode != FALSE);
|
||||
}
|
||||
|
||||
TESS_API TessOcrEngineMode TESS_CALL TessBaseAPIOem(const TessBaseAPI* handle)
|
||||
{
|
||||
return handle->oem();
|
||||
}
|
||||
|
||||
TESS_API void TESS_CALL TessBaseAPIInitTruthCallback(TessBaseAPI* handle, TessTruthCallback* cb)
|
||||
{
|
||||
handle->InitTruthCallback(cb);
|
||||
}
|
||||
|
||||
#ifndef NO_CUBE_BUILD
|
||||
TESS_API TessCubeRecoContext* TESS_CALL TessBaseAPIGetCubeRecoContext(const TessBaseAPI* handle)
|
||||
{
|
||||
return handle->GetCubeRecoContext();
|
||||
}
|
||||
#endif // NO_CUBE_BUILD
|
||||
|
||||
TESS_API void TESS_CALL TessBaseAPISetMinOrientationMargin(TessBaseAPI* handle, double margin)
|
||||
{
|
||||
handle->set_min_orientation_margin(margin);
|
||||
}
|
||||
|
||||
TESS_API void TESS_CALL TessBaseGetBlockTextOrientations(TessBaseAPI* handle, int** block_orientation, bool** vertical_writing)
|
||||
{
|
||||
handle->GetBlockTextOrientations(block_orientation, vertical_writing);
|
||||
}
|
||||
|
||||
TESS_API BLOCK_LIST* TESS_CALL TessBaseAPIFindLinesCreateBlockList(TessBaseAPI* handle)
|
||||
{
|
||||
return handle->FindLinesCreateBlockList();
|
||||
}
|
||||
|
||||
TESS_API void TESS_CALL TessPageIteratorDelete(TessPageIterator* handle)
|
||||
{
|
||||
delete handle;
|
||||
}
|
||||
|
||||
TESS_API TessPageIterator* TESS_CALL TessPageIteratorCopy(const TessPageIterator* handle)
|
||||
{
|
||||
return new TessPageIterator(*handle);
|
||||
}
|
||||
|
||||
TESS_API void TESS_CALL TessPageIteratorBegin(TessPageIterator* handle)
|
||||
{
|
||||
handle->Begin();
|
||||
}
|
||||
|
||||
TESS_API BOOL TESS_CALL TessPageIteratorNext(TessPageIterator* handle, TessPageIteratorLevel level)
|
||||
{
|
||||
return handle->Next(level) ? TRUE : FALSE;
|
||||
}
|
||||
|
||||
TESS_API BOOL TESS_CALL TessPageIteratorIsAtBeginningOf(const TessPageIterator* handle, TessPageIteratorLevel level)
|
||||
{
|
||||
return handle->IsAtBeginningOf(level) ? TRUE : FALSE;
|
||||
}
|
||||
|
||||
TESS_API BOOL TESS_CALL TessPageIteratorIsAtFinalElement(const TessPageIterator* handle, TessPageIteratorLevel level,
|
||||
TessPageIteratorLevel element)
|
||||
{
|
||||
return handle->IsAtFinalElement(level, element) ? TRUE : FALSE;
|
||||
}
|
||||
|
||||
TESS_API BOOL TESS_CALL TessPageIteratorBoundingBox(const TessPageIterator* handle, TessPageIteratorLevel level,
|
||||
int* left, int* top, int* right, int* bottom)
|
||||
{
|
||||
return handle->BoundingBox(level, left, top, right, bottom) ? TRUE : FALSE;
|
||||
}
|
||||
|
||||
TESS_API TessPolyBlockType TESS_CALL TessPageIteratorBlockType(const TessPageIterator* handle)
|
||||
{
|
||||
return handle->BlockType();
|
||||
}
|
||||
|
||||
TESS_API struct Pix* TESS_CALL TessPageIteratorGetBinaryImage(const TessPageIterator* handle, TessPageIteratorLevel level)
|
||||
{
|
||||
return handle->GetBinaryImage(level);
|
||||
}
|
||||
|
||||
TESS_API struct Pix* TESS_CALL TessPageIteratorGetImage(const TessPageIterator* handle, TessPageIteratorLevel level, int padding,
|
||||
struct Pix* original_image, int* left, int* top)
|
||||
{
|
||||
return handle->GetImage(level, padding, original_image, left, top);
|
||||
}
|
||||
|
||||
TESS_API BOOL TESS_CALL TessPageIteratorBaseline(const TessPageIterator* handle, TessPageIteratorLevel level,
|
||||
int* x1, int* y1, int* x2, int* y2)
|
||||
{
|
||||
return handle->Baseline(level, x1, y1, x2, y2) ? TRUE : FALSE;
|
||||
}
|
||||
|
||||
TESS_API void TESS_CALL TessPageIteratorOrientation(TessPageIterator* handle, TessOrientation* orientation,
|
||||
TessWritingDirection* writing_direction, TessTextlineOrder* textline_order,
|
||||
float* deskew_angle)
|
||||
{
|
||||
handle->Orientation(orientation, writing_direction, textline_order, deskew_angle);
|
||||
}
|
||||
|
||||
TESS_API void TESS_CALL TessPageIteratorParagraphInfo(TessPageIterator* handle, TessParagraphJustification* justification,
|
||||
BOOL *is_list_item, BOOL *is_crown, int *first_line_indent)
|
||||
{
|
||||
bool bool_is_list_item, bool_is_crown;
|
||||
handle->ParagraphInfo(justification, &bool_is_list_item, &bool_is_crown, first_line_indent);
|
||||
if (is_list_item)
|
||||
*is_list_item = bool_is_list_item ? TRUE : FALSE;
|
||||
if (is_crown)
|
||||
*is_crown = bool_is_crown ? TRUE : FALSE;
|
||||
}
|
||||
|
||||
|
||||
TESS_API void TESS_CALL TessResultIteratorDelete(TessResultIterator* handle)
|
||||
{
|
||||
delete handle;
|
||||
}
|
||||
|
||||
TESS_API TessResultIterator* TESS_CALL TessResultIteratorCopy(const TessResultIterator* handle)
|
||||
{
|
||||
return new TessResultIterator(*handle);
|
||||
}
|
||||
|
||||
TESS_API TessPageIterator* TESS_CALL TessResultIteratorGetPageIterator(TessResultIterator* handle)
|
||||
{
|
||||
return handle;
|
||||
}
|
||||
|
||||
TESS_API const TessPageIterator* TESS_CALL TessResultIteratorGetPageIteratorConst(const TessResultIterator* handle)
|
||||
{
|
||||
return handle;
|
||||
}
|
||||
|
||||
TESS_API TessChoiceIterator* TESS_CALL TessResultIteratorGetChoiceIterator(const TessResultIterator* handle)
|
||||
{
|
||||
return new TessChoiceIterator(*handle);
|
||||
}
|
||||
|
||||
TESS_API BOOL TESS_CALL TessResultIteratorNext(TessResultIterator* handle, TessPageIteratorLevel level)
|
||||
{
|
||||
return handle->Next(level);
|
||||
}
|
||||
|
||||
TESS_API char* TESS_CALL TessResultIteratorGetUTF8Text(const TessResultIterator* handle, TessPageIteratorLevel level)
|
||||
{
|
||||
return handle->GetUTF8Text(level);
|
||||
}
|
||||
|
||||
TESS_API float TESS_CALL TessResultIteratorConfidence(const TessResultIterator* handle, TessPageIteratorLevel level)
|
||||
{
|
||||
return handle->Confidence(level);
|
||||
}
|
||||
|
||||
TESS_API const char* TESS_CALL TessResultIteratorWordRecognitionLanguage(const TessResultIterator* handle)
|
||||
{
|
||||
return handle->WordRecognitionLanguage();
|
||||
}
|
||||
|
||||
TESS_API const char* TESS_CALL TessResultIteratorWordFontAttributes(const TessResultIterator* handle, BOOL* is_bold, BOOL* is_italic,
|
||||
BOOL* is_underlined, BOOL* is_monospace, BOOL* is_serif,
|
||||
BOOL* is_smallcaps, int* pointsize, int* font_id)
|
||||
{
|
||||
bool bool_is_bold, bool_is_italic, bool_is_underlined, bool_is_monospace, bool_is_serif, bool_is_smallcaps;
|
||||
const char* ret = handle->WordFontAttributes(&bool_is_bold, &bool_is_italic, &bool_is_underlined, &bool_is_monospace, &bool_is_serif,
|
||||
&bool_is_smallcaps, pointsize, font_id);
|
||||
if (is_bold)
|
||||
*is_bold = bool_is_bold ? TRUE : FALSE;
|
||||
if (is_italic)
|
||||
*is_italic = bool_is_italic ? TRUE : FALSE;
|
||||
if (is_underlined)
|
||||
*is_underlined = bool_is_underlined ? TRUE : FALSE;
|
||||
if (is_monospace)
|
||||
*is_monospace = bool_is_monospace ? TRUE : FALSE;
|
||||
if (is_serif)
|
||||
*is_serif = bool_is_serif ? TRUE : FALSE;
|
||||
if (is_smallcaps)
|
||||
*is_smallcaps = bool_is_smallcaps ? TRUE : FALSE;
|
||||
return ret;
|
||||
}
|
||||
|
||||
TESS_API BOOL TESS_CALL TessResultIteratorWordIsFromDictionary(const TessResultIterator* handle)
|
||||
{
|
||||
return handle->WordIsFromDictionary() ? TRUE : FALSE;
|
||||
}
|
||||
|
||||
TESS_API BOOL TESS_CALL TessResultIteratorWordIsNumeric(const TessResultIterator* handle)
|
||||
{
|
||||
return handle->WordIsNumeric() ? TRUE : FALSE;
|
||||
}
|
||||
|
||||
TESS_API BOOL TESS_CALL TessResultIteratorSymbolIsSuperscript(const TessResultIterator* handle)
|
||||
{
|
||||
return handle->SymbolIsSuperscript() ? TRUE : FALSE;
|
||||
}
|
||||
|
||||
TESS_API BOOL TESS_CALL TessResultIteratorSymbolIsSubscript(const TessResultIterator* handle)
|
||||
{
|
||||
return handle->SymbolIsSubscript() ? TRUE : FALSE;
|
||||
}
|
||||
|
||||
TESS_API BOOL TESS_CALL TessResultIteratorSymbolIsDropcap(const TessResultIterator* handle)
|
||||
{
|
||||
return handle->SymbolIsDropcap() ? TRUE : FALSE;
|
||||
}
|
||||
|
||||
TESS_API void TESS_CALL TessChoiceIteratorDelete(TessChoiceIterator* handle)
|
||||
{
|
||||
delete handle;
|
||||
}
|
||||
|
||||
TESS_API BOOL TESS_CALL TessChoiceIteratorNext(TessChoiceIterator* handle)
|
||||
{
|
||||
return handle->Next();
|
||||
}
|
||||
|
||||
TESS_API const char* TESS_CALL TessChoiceIteratorGetUTF8Text(const TessChoiceIterator* handle)
|
||||
{
|
||||
return handle->GetUTF8Text();
|
||||
}
|
||||
|
||||
TESS_API float TESS_CALL TessChoiceIteratorConfidence(const TessChoiceIterator* handle)
|
||||
{
|
||||
return handle->Confidence();
|
||||
}
|
|
@ -0,0 +1,409 @@
|
|||
///////////////////////////////////////////////////////////////////////
|
||||
// File: capi.h
|
||||
// Description: C-API TessBaseAPI
|
||||
//
|
||||
// (C) Copyright 2012, Google Inc.
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
///////////////////////////////////////////////////////////////////////
|
||||
|
||||
#ifndef API_CAPI_H_
|
||||
#define API_CAPI_H_
|
||||
|
||||
#ifdef TESS_CAPI_INCLUDE_BASEAPI
|
||||
# include "baseapi.h"
|
||||
# include "pageiterator.h"
|
||||
# include "resultiterator.h"
|
||||
# include "renderer.h"
|
||||
#else
|
||||
# include "platform.h"
|
||||
# include <stdio.h>
|
||||
#endif
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
#ifndef TESS_CALL
|
||||
# if defined(WIN32)
|
||||
# define TESS_CALL __cdecl
|
||||
# else
|
||||
# define TESS_CALL
|
||||
# endif
|
||||
#endif
|
||||
|
||||
#ifndef BOOL
|
||||
# define BOOL int
|
||||
# define TRUE 1
|
||||
# define FALSE 0
|
||||
#endif
|
||||
|
||||
#ifdef TESS_CAPI_INCLUDE_BASEAPI
|
||||
typedef tesseract::TessResultRenderer TessResultRenderer;
|
||||
typedef tesseract::TessTextRenderer TessTextRenderer;
|
||||
typedef tesseract::TessHOcrRenderer TessHOcrRenderer;
|
||||
typedef tesseract::TessPDFRenderer TessPDFRenderer;
|
||||
typedef tesseract::TessUnlvRenderer TessUnlvRenderer;
|
||||
typedef tesseract::TessBoxTextRenderer TessBoxTextRenderer;
|
||||
typedef tesseract::TessBaseAPI TessBaseAPI;
|
||||
typedef tesseract::PageIterator TessPageIterator;
|
||||
typedef tesseract::ResultIterator TessResultIterator;
|
||||
typedef tesseract::MutableIterator TessMutableIterator;
|
||||
typedef tesseract::ChoiceIterator TessChoiceIterator;
|
||||
typedef tesseract::OcrEngineMode TessOcrEngineMode;
|
||||
typedef tesseract::PageSegMode TessPageSegMode;
|
||||
typedef tesseract::ImageThresholder TessImageThresholder;
|
||||
typedef tesseract::PageIteratorLevel TessPageIteratorLevel;
|
||||
typedef tesseract::DictFunc TessDictFunc;
|
||||
typedef tesseract::ProbabilityInContextFunc TessProbabilityInContextFunc;
|
||||
// typedef tesseract::ParamsModelClassifyFunc TessParamsModelClassifyFunc;
|
||||
typedef tesseract::FillLatticeFunc TessFillLatticeFunc;
|
||||
typedef tesseract::Dawg TessDawg;
|
||||
typedef tesseract::TruthCallback TessTruthCallback;
|
||||
#ifndef NO_CUBE_BUILD
|
||||
typedef tesseract::CubeRecoContext TessCubeRecoContext;
|
||||
#endif // NO_CUBE_BUILD
|
||||
typedef tesseract::Orientation TessOrientation;
|
||||
typedef tesseract::ParagraphJustification TessParagraphJustification;
|
||||
typedef tesseract::WritingDirection TessWritingDirection;
|
||||
typedef tesseract::TextlineOrder TessTextlineOrder;
|
||||
typedef PolyBlockType TessPolyBlockType;
|
||||
#else
|
||||
typedef struct TessResultRenderer TessResultRenderer;
|
||||
typedef struct TessTextRenderer TessTextRenderer;
|
||||
typedef struct TessHOcrRenderer TessHOcrRenderer;
|
||||
typedef struct TessPDFRenderer TessPDFRenderer;
|
||||
typedef struct TessUnlvRenderer TessUnlvRenderer;
|
||||
typedef struct TessBoxTextRenderer TessBoxTextRenderer;
|
||||
typedef struct TessBaseAPI TessBaseAPI;
|
||||
typedef struct TessPageIterator TessPageIterator;
|
||||
typedef struct TessResultIterator TessResultIterator;
|
||||
typedef struct TessMutableIterator TessMutableIterator;
|
||||
typedef struct TessChoiceIterator TessChoiceIterator;
|
||||
typedef enum TessOcrEngineMode { OEM_TESSERACT_ONLY, OEM_CUBE_ONLY, OEM_TESSERACT_CUBE_COMBINED, OEM_DEFAULT } TessOcrEngineMode;
|
||||
typedef enum TessPageSegMode { PSM_OSD_ONLY, PSM_AUTO_OSD, PSM_AUTO_ONLY, PSM_AUTO, PSM_SINGLE_COLUMN, PSM_SINGLE_BLOCK_VERT_TEXT,
|
||||
PSM_SINGLE_BLOCK, PSM_SINGLE_LINE, PSM_SINGLE_WORD, PSM_CIRCLE_WORD, PSM_SINGLE_CHAR, PSM_SPARSE_TEXT,
|
||||
PSM_SPARSE_TEXT_OSD, PSM_COUNT } TessPageSegMode;
|
||||
typedef enum TessPageIteratorLevel { RIL_BLOCK, RIL_PARA, RIL_TEXTLINE, RIL_WORD, RIL_SYMBOL} TessPageIteratorLevel;
|
||||
typedef enum TessPolyBlockType { PT_UNKNOWN, PT_FLOWING_TEXT, PT_HEADING_TEXT, PT_PULLOUT_TEXT, PT_EQUATION, PT_INLINE_EQUATION,
|
||||
PT_TABLE, PT_VERTICAL_TEXT, PT_CAPTION_TEXT, PT_FLOWING_IMAGE, PT_HEADING_IMAGE,
|
||||
PT_PULLOUT_IMAGE, PT_HORZ_LINE, PT_VERT_LINE, PT_NOISE, PT_COUNT } TessPolyBlockType;
|
||||
typedef enum TessOrientation { ORIENTATION_PAGE_UP, ORIENTATION_PAGE_RIGHT, ORIENTATION_PAGE_DOWN, ORIENTATION_PAGE_LEFT } TessOrientation;
|
||||
typedef enum TessParagraphJustification { JUSTIFICATION_UNKNOWN, JUSTIFICATION_LEFT, JUSTIFICATION_CENTER, JUSTIFICATION_RIGHT } TessParagraphJustification;
|
||||
typedef enum TessWritingDirection { WRITING_DIRECTION_LEFT_TO_RIGHT, WRITING_DIRECTION_RIGHT_TO_LEFT, WRITING_DIRECTION_TOP_TO_BOTTOM } TessWritingDirection;
|
||||
typedef enum TessTextlineOrder { TEXTLINE_ORDER_LEFT_TO_RIGHT, TEXTLINE_ORDER_RIGHT_TO_LEFT, TEXTLINE_ORDER_TOP_TO_BOTTOM } TessTextlineOrder;
|
||||
typedef struct ETEXT_DESC ETEXT_DESC;
|
||||
#endif
|
||||
|
||||
struct Pix;
|
||||
struct Boxa;
|
||||
struct Pixa;
|
||||
|
||||
/* General free functions */
|
||||
|
||||
TESS_API const char*
|
||||
TESS_CALL TessVersion();
|
||||
TESS_API void TESS_CALL TessDeleteText(char* text);
|
||||
TESS_API void TESS_CALL TessDeleteTextArray(char** arr);
|
||||
TESS_API void TESS_CALL TessDeleteIntArray(int* arr);
|
||||
#ifdef TESS_CAPI_INCLUDE_BASEAPI
|
||||
TESS_API void TESS_CALL TessDeleteBlockList(BLOCK_LIST* block_list);
|
||||
#endif
|
||||
|
||||
/* Renderer API */
|
||||
TESS_API TessResultRenderer* TESS_CALL TessTextRendererCreate(const char* outputbase);
|
||||
TESS_API TessResultRenderer* TESS_CALL TessHOcrRendererCreate(const char* outputbase);
|
||||
TESS_API TessResultRenderer* TESS_CALL TessHOcrRendererCreate2(const char* outputbase, BOOL font_info);
|
||||
TESS_API TessResultRenderer* TESS_CALL TessPDFRendererCreate(const char* outputbase, const char* datadir);
|
||||
TESS_API TessResultRenderer* TESS_CALL TessPDFRendererCreateTextonly(const char* outputbase, const char* datadir,
|
||||
BOOL textonly);
|
||||
TESS_API TessResultRenderer* TESS_CALL TessUnlvRendererCreate(const char* outputbase);
|
||||
TESS_API TessResultRenderer* TESS_CALL TessBoxTextRendererCreate(const char* outputbase);
|
||||
|
||||
TESS_API void TESS_CALL TessDeleteResultRenderer(TessResultRenderer* renderer);
|
||||
TESS_API void TESS_CALL TessResultRendererInsert(TessResultRenderer* renderer, TessResultRenderer* next);
|
||||
TESS_API TessResultRenderer*
|
||||
TESS_CALL TessResultRendererNext(TessResultRenderer* renderer);
|
||||
TESS_API BOOL TESS_CALL TessResultRendererBeginDocument(TessResultRenderer* renderer, const char* title);
|
||||
TESS_API BOOL TESS_CALL TessResultRendererAddImage(TessResultRenderer* renderer, TessBaseAPI* api);
|
||||
TESS_API BOOL TESS_CALL TessResultRendererEndDocument(TessResultRenderer* renderer);
|
||||
|
||||
TESS_API const char* TESS_CALL TessResultRendererExtention(TessResultRenderer* renderer);
|
||||
TESS_API const char* TESS_CALL TessResultRendererTitle(TessResultRenderer* renderer);
|
||||
TESS_API int TESS_CALL TessResultRendererImageNum(TessResultRenderer* renderer);
|
||||
|
||||
/* Base API */
|
||||
|
||||
TESS_API TessBaseAPI*
|
||||
TESS_CALL TessBaseAPICreate();
|
||||
TESS_API void TESS_CALL TessBaseAPIDelete(TessBaseAPI* handle);
|
||||
|
||||
TESS_API size_t TESS_CALL TessBaseAPIGetOpenCLDevice(TessBaseAPI* handle, void **device);
|
||||
|
||||
TESS_API void TESS_CALL TessBaseAPISetInputName( TessBaseAPI* handle, const char* name);
|
||||
TESS_API const char* TESS_CALL TessBaseAPIGetInputName(TessBaseAPI* handle);
|
||||
|
||||
TESS_API void TESS_CALL TessBaseAPISetInputImage(TessBaseAPI* handle, struct Pix* pix);
|
||||
TESS_API struct Pix* TESS_CALL TessBaseAPIGetInputImage(TessBaseAPI* handle);
|
||||
|
||||
TESS_API int TESS_CALL TessBaseAPIGetSourceYResolution(TessBaseAPI* handle);
|
||||
TESS_API const char* TESS_CALL TessBaseAPIGetDatapath(TessBaseAPI* handle);
|
||||
|
||||
TESS_API void TESS_CALL TessBaseAPISetOutputName(TessBaseAPI* handle, const char* name);
|
||||
|
||||
TESS_API BOOL TESS_CALL TessBaseAPISetVariable(TessBaseAPI* handle, const char* name, const char* value);
|
||||
TESS_API BOOL TESS_CALL TessBaseAPISetDebugVariable(TessBaseAPI* handle, const char* name, const char* value);
|
||||
|
||||
TESS_API BOOL TESS_CALL TessBaseAPIGetIntVariable( const TessBaseAPI* handle, const char* name, int* value);
|
||||
TESS_API BOOL TESS_CALL TessBaseAPIGetBoolVariable( const TessBaseAPI* handle, const char* name, BOOL* value);
|
||||
TESS_API BOOL TESS_CALL TessBaseAPIGetDoubleVariable(const TessBaseAPI* handle, const char* name, double* value);
|
||||
TESS_API const char*
|
||||
TESS_CALL TessBaseAPIGetStringVariable(const TessBaseAPI* handle, const char* name);
|
||||
|
||||
TESS_API void TESS_CALL TessBaseAPIPrintVariables( const TessBaseAPI* handle, FILE* fp);
|
||||
TESS_API BOOL TESS_CALL TessBaseAPIPrintVariablesToFile(const TessBaseAPI* handle, const char* filename);
|
||||
#ifdef TESS_CAPI_INCLUDE_BASEAPI
|
||||
TESS_API BOOL TESS_CALL TessBaseAPIGetVariableAsString(TessBaseAPI* handle, const char* name, STRING* val);
|
||||
#endif
|
||||
|
||||
#ifdef TESS_CAPI_INCLUDE_BASEAPI
|
||||
TESS_API int TESS_CALL TessBaseAPIInit(TessBaseAPI* handle, const char* datapath, const char* language,
|
||||
TessOcrEngineMode mode, char** configs, int configs_size,
|
||||
const STRING* vars_vec, size_t vars_vec_size,
|
||||
const STRING* vars_values, size_t vars_values_size, BOOL set_only_init_params);
|
||||
#endif
|
||||
TESS_API int TESS_CALL TessBaseAPIInit1(TessBaseAPI* handle, const char* datapath, const char* language, TessOcrEngineMode oem,
|
||||
char** configs, int configs_size);
|
||||
TESS_API int TESS_CALL TessBaseAPIInit2(TessBaseAPI* handle, const char* datapath, const char* language, TessOcrEngineMode oem);
|
||||
TESS_API int TESS_CALL TessBaseAPIInit3(TessBaseAPI* handle, const char* datapath, const char* language);
|
||||
|
||||
TESS_API int TESS_CALL TessBaseAPIInit4(TessBaseAPI* handle, const char* datapath, const char* language, TessOcrEngineMode mode,
|
||||
char** configs, int configs_size,
|
||||
char** vars_vec, char** vars_values, size_t vars_vec_size,
|
||||
BOOL set_only_non_debug_params);
|
||||
|
||||
TESS_API const char*
|
||||
TESS_CALL TessBaseAPIGetInitLanguagesAsString(const TessBaseAPI* handle);
|
||||
TESS_API char**
|
||||
TESS_CALL TessBaseAPIGetLoadedLanguagesAsVector(const TessBaseAPI* handle);
|
||||
TESS_API char**
|
||||
TESS_CALL TessBaseAPIGetAvailableLanguagesAsVector(const TessBaseAPI* handle);
|
||||
|
||||
TESS_API int TESS_CALL TessBaseAPIInitLangMod(TessBaseAPI* handle, const char* datapath, const char* language);
|
||||
TESS_API void TESS_CALL TessBaseAPIInitForAnalysePage(TessBaseAPI* handle);
|
||||
|
||||
TESS_API void TESS_CALL TessBaseAPIReadConfigFile(TessBaseAPI* handle, const char* filename);
|
||||
TESS_API void TESS_CALL TessBaseAPIReadDebugConfigFile(TessBaseAPI* handle, const char* filename);
|
||||
|
||||
TESS_API void TESS_CALL TessBaseAPISetPageSegMode(TessBaseAPI* handle, TessPageSegMode mode);
|
||||
TESS_API TessPageSegMode
|
||||
TESS_CALL TessBaseAPIGetPageSegMode(const TessBaseAPI* handle);
|
||||
|
||||
TESS_API char* TESS_CALL TessBaseAPIRect(TessBaseAPI* handle, const unsigned char* imagedata,
|
||||
int bytes_per_pixel, int bytes_per_line,
|
||||
int left, int top, int width, int height);
|
||||
|
||||
TESS_API void TESS_CALL TessBaseAPIClearAdaptiveClassifier(TessBaseAPI* handle);
|
||||
|
||||
TESS_API void TESS_CALL TessBaseAPISetImage(TessBaseAPI* handle, const unsigned char* imagedata, int width, int height,
|
||||
int bytes_per_pixel, int bytes_per_line);
|
||||
TESS_API void TESS_CALL TessBaseAPISetImage2(TessBaseAPI* handle, struct Pix* pix);
|
||||
|
||||
TESS_API void TESS_CALL TessBaseAPISetSourceResolution(TessBaseAPI* handle, int ppi);
|
||||
|
||||
TESS_API void TESS_CALL TessBaseAPISetRectangle(TessBaseAPI* handle, int left, int top, int width, int height);
|
||||
|
||||
#ifdef TESS_CAPI_INCLUDE_BASEAPI
|
||||
TESS_API void TESS_CALL TessBaseAPISetThresholder(TessBaseAPI* handle, TessImageThresholder* thresholder);
|
||||
#endif
|
||||
|
||||
TESS_API struct Pix*
|
||||
TESS_CALL TessBaseAPIGetThresholdedImage( TessBaseAPI* handle);
|
||||
TESS_API struct Boxa*
|
||||
TESS_CALL TessBaseAPIGetRegions( TessBaseAPI* handle, struct Pixa** pixa);
|
||||
TESS_API struct Boxa*
|
||||
TESS_CALL TessBaseAPIGetTextlines( TessBaseAPI* handle, struct Pixa** pixa, int** blockids);
|
||||
TESS_API struct Boxa*
|
||||
TESS_CALL TessBaseAPIGetTextlines1( TessBaseAPI* handle, const BOOL raw_image, const int raw_padding,
|
||||
struct Pixa** pixa, int** blockids, int** paraids);
|
||||
TESS_API struct Boxa*
|
||||
TESS_CALL TessBaseAPIGetStrips( TessBaseAPI* handle, struct Pixa** pixa, int** blockids);
|
||||
TESS_API struct Boxa*
|
||||
TESS_CALL TessBaseAPIGetWords( TessBaseAPI* handle, struct Pixa** pixa);
|
||||
TESS_API struct Boxa*
|
||||
TESS_CALL TessBaseAPIGetConnectedComponents(TessBaseAPI* handle, struct Pixa** cc);
|
||||
TESS_API struct Boxa*
|
||||
TESS_CALL TessBaseAPIGetComponentImages( TessBaseAPI* handle, const TessPageIteratorLevel level, const BOOL text_only,
|
||||
struct Pixa** pixa, int** blockids);
|
||||
TESS_API struct Boxa*
|
||||
TESS_CALL TessBaseAPIGetComponentImages1( TessBaseAPI* handle, const TessPageIteratorLevel level, const BOOL text_only,
|
||||
const BOOL raw_image, const int raw_padding,
|
||||
struct Pixa** pixa, int** blockids, int** paraids);
|
||||
|
||||
TESS_API int TESS_CALL TessBaseAPIGetThresholdedImageScaleFactor(const TessBaseAPI* handle);
|
||||
|
||||
TESS_API void TESS_CALL TessBaseAPIDumpPGM(TessBaseAPI* handle, const char* filename);
|
||||
|
||||
TESS_API TessPageIterator*
|
||||
TESS_CALL TessBaseAPIAnalyseLayout(TessBaseAPI* handle);
|
||||
|
||||
TESS_API int TESS_CALL TessBaseAPIRecognize(TessBaseAPI* handle, ETEXT_DESC* monitor);
|
||||
TESS_API int TESS_CALL TessBaseAPIRecognizeForChopTest(TessBaseAPI* handle, ETEXT_DESC* monitor);
|
||||
TESS_API BOOL TESS_CALL TessBaseAPIProcessPages(TessBaseAPI* handle, const char* filename, const char* retry_config,
|
||||
int timeout_millisec, TessResultRenderer* renderer);
|
||||
TESS_API BOOL TESS_CALL TessBaseAPIProcessPage(TessBaseAPI* handle, struct Pix* pix, int page_index, const char* filename,
|
||||
const char* retry_config, int timeout_millisec, TessResultRenderer* renderer);
|
||||
|
||||
TESS_API TessResultIterator*
|
||||
TESS_CALL TessBaseAPIGetIterator(TessBaseAPI* handle);
|
||||
TESS_API TessMutableIterator*
|
||||
TESS_CALL TessBaseAPIGetMutableIterator(TessBaseAPI* handle);
|
||||
|
||||
TESS_API char* TESS_CALL TessBaseAPIGetUTF8Text(TessBaseAPI* handle);
|
||||
TESS_API char* TESS_CALL TessBaseAPIGetHOCRText(TessBaseAPI* handle, int page_number);
|
||||
TESS_API char* TESS_CALL TessBaseAPIGetBoxText(TessBaseAPI* handle, int page_number);
|
||||
TESS_API char* TESS_CALL TessBaseAPIGetUNLVText(TessBaseAPI* handle);
|
||||
TESS_API int TESS_CALL TessBaseAPIMeanTextConf(TessBaseAPI* handle);
|
||||
TESS_API int* TESS_CALL TessBaseAPIAllWordConfidences(TessBaseAPI* handle);
|
||||
TESS_API BOOL TESS_CALL TessBaseAPIAdaptToWordStr(TessBaseAPI* handle, TessPageSegMode mode, const char* wordstr);
|
||||
|
||||
TESS_API void TESS_CALL TessBaseAPIClear(TessBaseAPI* handle);
|
||||
TESS_API void TESS_CALL TessBaseAPIEnd(TessBaseAPI* handle);
|
||||
|
||||
TESS_API int TESS_CALL TessBaseAPIIsValidWord(TessBaseAPI* handle, const char* word);
|
||||
TESS_API BOOL TESS_CALL TessBaseAPIGetTextDirection(TessBaseAPI* handle, int* out_offset, float* out_slope);
|
||||
|
||||
#ifdef TESS_CAPI_INCLUDE_BASEAPI
|
||||
TESS_API void TESS_CALL TessBaseAPISetDictFunc(TessBaseAPI* handle, TessDictFunc f);
|
||||
TESS_API void TESS_CALL TessBaseAPIClearPersistentCache(TessBaseAPI* handle);
|
||||
TESS_API void TESS_CALL TessBaseAPISetProbabilityInContextFunc(TessBaseAPI* handle, TessProbabilityInContextFunc f);
|
||||
|
||||
TESS_API void TESS_CALL TessBaseAPISetFillLatticeFunc(TessBaseAPI* handle, TessFillLatticeFunc f);
|
||||
|
||||
// Deprecated, no longer working
|
||||
TESS_API BOOL TESS_CALL TessBaseAPIDetectOS(TessBaseAPI* handle, OSResults* results);
|
||||
|
||||
// Call TessDeleteText(*best_script_name) to free memory allocated by this function
|
||||
TESS_API BOOL TESS_CALL TessBaseAPIDetectOrientationScript(TessBaseAPI* handle,
|
||||
int* orient_deg, float* orient_conf, const char **script_name, float* script_conf);
|
||||
|
||||
TESS_API void TESS_CALL TessBaseAPIGetFeaturesForBlob(TessBaseAPI* handle, TBLOB* blob, INT_FEATURE_STRUCT* int_features,
|
||||
int* num_features, int* FeatureOutlineIndex);
|
||||
|
||||
TESS_API ROW* TESS_CALL TessFindRowForBox(BLOCK_LIST* blocks, int left, int top, int right, int bottom);
|
||||
TESS_API void TESS_CALL TessBaseAPIRunAdaptiveClassifier(TessBaseAPI* handle, TBLOB* blob, int num_max_matches,
|
||||
int* unichar_ids, float* ratings, int* num_matches_returned);
|
||||
#endif
|
||||
|
||||
TESS_API const char*
|
||||
TESS_CALL TessBaseAPIGetUnichar(TessBaseAPI* handle, int unichar_id);
|
||||
|
||||
#ifdef TESS_CAPI_INCLUDE_BASEAPI
|
||||
TESS_API const TessDawg*
|
||||
TESS_CALL TessBaseAPIGetDawg(const TessBaseAPI* handle, int i);
|
||||
TESS_API int TESS_CALL TessBaseAPINumDawgs(const TessBaseAPI* handle);
|
||||
#endif
|
||||
|
||||
#ifdef TESS_CAPI_INCLUDE_BASEAPI
|
||||
TESS_API ROW* TESS_CALL TessMakeTessOCRRow(float baseline, float xheight, float descender, float ascender);
|
||||
TESS_API TBLOB*
|
||||
TESS_CALL TessMakeTBLOB(Pix* pix);
|
||||
TESS_API void TESS_CALL TessNormalizeTBLOB(TBLOB* tblob, ROW* row, BOOL numeric_mode);
|
||||
|
||||
TESS_API TessOcrEngineMode
|
||||
TESS_CALL TessBaseAPIOem(const TessBaseAPI* handle);
|
||||
TESS_API void TESS_CALL TessBaseAPIInitTruthCallback(TessBaseAPI* handle, TessTruthCallback* cb);
|
||||
|
||||
#ifndef NO_CUBE_BUILD
|
||||
TESS_API TessCubeRecoContext*
|
||||
TESS_CALL TessBaseAPIGetCubeRecoContext(const TessBaseAPI* handle);
|
||||
#endif // NO_CUBE_BUILD
|
||||
#endif
|
||||
|
||||
TESS_API void TESS_CALL TessBaseAPISetMinOrientationMargin(TessBaseAPI* handle, double margin);
|
||||
#ifdef TESS_CAPI_INCLUDE_BASEAPI
|
||||
TESS_API void TESS_CALL TessBaseGetBlockTextOrientations(TessBaseAPI* handle, int** block_orientation, BOOL** vertical_writing);
|
||||
|
||||
TESS_API BLOCK_LIST*
|
||||
TESS_CALL TessBaseAPIFindLinesCreateBlockList(TessBaseAPI* handle);
|
||||
#endif
|
||||
|
||||
/* Page iterator */
|
||||
|
||||
TESS_API void TESS_CALL TessPageIteratorDelete(TessPageIterator* handle);
|
||||
TESS_API TessPageIterator*
|
||||
TESS_CALL TessPageIteratorCopy(const TessPageIterator* handle);
|
||||
|
||||
TESS_API void TESS_CALL TessPageIteratorBegin(TessPageIterator* handle);
|
||||
TESS_API BOOL TESS_CALL TessPageIteratorNext(TessPageIterator* handle, TessPageIteratorLevel level);
|
||||
TESS_API BOOL TESS_CALL TessPageIteratorIsAtBeginningOf(const TessPageIterator* handle, TessPageIteratorLevel level);
|
||||
TESS_API BOOL TESS_CALL TessPageIteratorIsAtFinalElement(const TessPageIterator* handle, TessPageIteratorLevel level,
|
||||
TessPageIteratorLevel element);
|
||||
|
||||
TESS_API BOOL TESS_CALL TessPageIteratorBoundingBox(const TessPageIterator* handle, TessPageIteratorLevel level,
|
||||
int* left, int* top, int* right, int* bottom);
|
||||
TESS_API TessPolyBlockType
|
||||
TESS_CALL TessPageIteratorBlockType(const TessPageIterator* handle);
|
||||
|
||||
TESS_API struct Pix*
|
||||
TESS_CALL TessPageIteratorGetBinaryImage(const TessPageIterator* handle, TessPageIteratorLevel level);
|
||||
TESS_API struct Pix*
|
||||
TESS_CALL TessPageIteratorGetImage(const TessPageIterator* handle, TessPageIteratorLevel level, int padding,
|
||||
struct Pix* original_image, int* left, int* top);
|
||||
|
||||
TESS_API BOOL TESS_CALL TessPageIteratorBaseline(const TessPageIterator* handle, TessPageIteratorLevel level,
|
||||
int* x1, int* y1, int* x2, int* y2);
|
||||
|
||||
TESS_API void TESS_CALL TessPageIteratorOrientation(TessPageIterator* handle, TessOrientation* orientation,
|
||||
TessWritingDirection* writing_direction, TessTextlineOrder* textline_order,
|
||||
float* deskew_angle);
|
||||
|
||||
TESS_API void TESS_CALL TessPageIteratorParagraphInfo(TessPageIterator* handle, TessParagraphJustification* justification,
|
||||
BOOL *is_list_item, BOOL *is_crown, int *first_line_indent);
|
||||
|
||||
/* Result iterator */
|
||||
|
||||
TESS_API void TESS_CALL TessResultIteratorDelete(TessResultIterator* handle);
|
||||
TESS_API TessResultIterator*
|
||||
TESS_CALL TessResultIteratorCopy(const TessResultIterator* handle);
|
||||
TESS_API TessPageIterator*
|
||||
TESS_CALL TessResultIteratorGetPageIterator(TessResultIterator* handle);
|
||||
TESS_API const TessPageIterator*
|
||||
TESS_CALL TessResultIteratorGetPageIteratorConst(const TessResultIterator* handle);
|
||||
TESS_API TessChoiceIterator*
|
||||
TESS_CALL TessResultIteratorGetChoiceIterator(const TessResultIterator* handle);
|
||||
|
||||
TESS_API BOOL TESS_CALL TessResultIteratorNext(TessResultIterator* handle, TessPageIteratorLevel level);
|
||||
TESS_API char* TESS_CALL TessResultIteratorGetUTF8Text(const TessResultIterator* handle, TessPageIteratorLevel level);
|
||||
TESS_API float TESS_CALL TessResultIteratorConfidence(const TessResultIterator* handle, TessPageIteratorLevel level);
|
||||
TESS_API const char*
|
||||
TESS_CALL TessResultIteratorWordRecognitionLanguage(const TessResultIterator* handle);
|
||||
TESS_API const char*
|
||||
TESS_CALL TessResultIteratorWordFontAttributes(const TessResultIterator* handle, BOOL* is_bold, BOOL* is_italic,
|
||||
BOOL* is_underlined, BOOL* is_monospace, BOOL* is_serif,
|
||||
BOOL* is_smallcaps, int* pointsize, int* font_id);
|
||||
|
||||
TESS_API BOOL TESS_CALL TessResultIteratorWordIsFromDictionary(const TessResultIterator* handle);
|
||||
TESS_API BOOL TESS_CALL TessResultIteratorWordIsNumeric(const TessResultIterator* handle);
|
||||
TESS_API BOOL TESS_CALL TessResultIteratorSymbolIsSuperscript(const TessResultIterator* handle);
|
||||
TESS_API BOOL TESS_CALL TessResultIteratorSymbolIsSubscript(const TessResultIterator* handle);
|
||||
TESS_API BOOL TESS_CALL TessResultIteratorSymbolIsDropcap(const TessResultIterator* handle);
|
||||
|
||||
TESS_API void TESS_CALL TessChoiceIteratorDelete(TessChoiceIterator* handle);
|
||||
TESS_API BOOL TESS_CALL TessChoiceIteratorNext(TessChoiceIterator* handle);
|
||||
TESS_API const char* TESS_CALL TessChoiceIteratorGetUTF8Text(const TessChoiceIterator* handle);
|
||||
TESS_API float TESS_CALL TessChoiceIteratorConfidence(const TessChoiceIterator* handle);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif // API_CAPI_H_
|
|
@ -0,0 +1,9 @@
|
|||
#define HAVE_LIBJPEG 1
|
||||
#define HAVE_LIBTIFF 1
|
||||
#define HAVE_LIBPNG 1
|
||||
#define HAVE_LIBZ 1
|
||||
#define HAVE_LIBGIF 1
|
||||
#define HAVE_LIBUNGIF 0
|
||||
#define HAVE_LIBWEBP 1
|
||||
#define HAVE_LIBJP2K 1
|
||||
#define LIBJP2K_HEADER <openjpeg.h>
|
File diff suppressed because it is too large
Load Diff
|
@ -0,0 +1,283 @@
|
|||
///////////////////////////////////////////////////////////////////////
|
||||
// File: renderer.cpp
|
||||
// Description: Rendering interface to inject into TessBaseAPI
|
||||
//
|
||||
// (C) Copyright 2011, Google Inc.
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
///////////////////////////////////////////////////////////////////////
|
||||
|
||||
#ifdef HAVE_CONFIG_H
|
||||
#include "config_auto.h"
|
||||
#endif
|
||||
|
||||
#include <string.h>
|
||||
#include "baseapi.h"
|
||||
#include "genericvector.h"
|
||||
#include "renderer.h"
|
||||
|
||||
namespace tesseract {
|
||||
|
||||
/**********************************************************************
|
||||
* Base Renderer interface implementation
|
||||
**********************************************************************/
|
||||
TessResultRenderer::TessResultRenderer(const char *outputbase,
|
||||
const char* extension)
|
||||
: file_extension_(extension),
|
||||
title_(""), imagenum_(-1),
|
||||
fout_(stdout),
|
||||
next_(NULL),
|
||||
happy_(true) {
|
||||
if (strcmp(outputbase, "-") && strcmp(outputbase, "stdout")) {
|
||||
STRING outfile = STRING(outputbase) + STRING(".") + STRING(file_extension_);
|
||||
fout_ = fopen(outfile.string(), "wb");
|
||||
if (fout_ == NULL) {
|
||||
happy_ = false;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
TessResultRenderer::~TessResultRenderer() {
|
||||
if (fout_ != NULL) {
|
||||
if (fout_ != stdout)
|
||||
fclose(fout_);
|
||||
else
|
||||
clearerr(fout_);
|
||||
}
|
||||
delete next_;
|
||||
}
|
||||
|
||||
void TessResultRenderer::insert(TessResultRenderer* next) {
|
||||
if (next == NULL) return;
|
||||
|
||||
TessResultRenderer* remainder = next_;
|
||||
next_ = next;
|
||||
if (remainder) {
|
||||
while (next->next_ != NULL) {
|
||||
next = next->next_;
|
||||
}
|
||||
next->next_ = remainder;
|
||||
}
|
||||
}
|
||||
|
||||
bool TessResultRenderer::BeginDocument(const char* title) {
|
||||
if (!happy_) return false;
|
||||
title_ = title;
|
||||
imagenum_ = -1;
|
||||
bool ok = BeginDocumentHandler();
|
||||
if (next_) {
|
||||
ok = next_->BeginDocument(title) && ok;
|
||||
}
|
||||
return ok;
|
||||
}
|
||||
|
||||
bool TessResultRenderer::AddImage(TessBaseAPI* api, const char* jpgdata, int len) {
|
||||
if (!happy_) return false;
|
||||
++imagenum_;
|
||||
bool ok = AddImageHandler(api, jpgdata, len);
|
||||
if (next_) {
|
||||
ok = next_->AddImage(api, jpgdata, len) && ok;
|
||||
}
|
||||
return ok;
|
||||
}
|
||||
|
||||
bool TessResultRenderer::EndDocument() {
|
||||
if (!happy_) return false;
|
||||
bool ok = EndDocumentHandler();
|
||||
if (next_) {
|
||||
ok = next_->EndDocument() && ok;
|
||||
}
|
||||
return ok;
|
||||
}
|
||||
|
||||
void TessResultRenderer::AppendString(const char* s) {
|
||||
AppendData(s, strlen(s));
|
||||
}
|
||||
|
||||
void TessResultRenderer::AppendData(const char* s, int len) {
|
||||
int n = fwrite(s, 1, len, fout_);
|
||||
if (n != len) happy_ = false;
|
||||
}
|
||||
|
||||
bool TessResultRenderer::BeginDocumentHandler() {
|
||||
return happy_;
|
||||
}
|
||||
|
||||
bool TessResultRenderer::EndDocumentHandler() {
|
||||
return happy_;
|
||||
}
|
||||
|
||||
|
||||
/**********************************************************************
|
||||
* UTF8 Text Renderer interface implementation
|
||||
**********************************************************************/
|
||||
TessTextRenderer::TessTextRenderer(const char *outputbase)
|
||||
: TessResultRenderer(outputbase, "txt") {
|
||||
}
|
||||
|
||||
bool TessTextRenderer::AddImageHandler(TessBaseAPI* api) {
|
||||
char* utf8 = api->GetUTF8Text();
|
||||
if (utf8 == NULL) {
|
||||
return false;
|
||||
}
|
||||
|
||||
AppendString(utf8);
|
||||
delete[] utf8;
|
||||
|
||||
bool pageBreak = false;
|
||||
api->GetBoolVariable("include_page_breaks", &pageBreak);
|
||||
const char* pageSeparator = api->GetStringVariable("page_separator");
|
||||
if (pageBreak) {
|
||||
AppendString(pageSeparator);
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
/**********************************************************************
|
||||
* HOcr Text Renderer interface implementation
|
||||
**********************************************************************/
|
||||
TessHOcrRenderer::TessHOcrRenderer(const char *outputbase)
|
||||
: TessResultRenderer(outputbase, "hocr") {
|
||||
font_info_ = false;
|
||||
}
|
||||
|
||||
TessHOcrRenderer::TessHOcrRenderer(const char *outputbase, bool font_info)
|
||||
: TessResultRenderer(outputbase, "hocr") {
|
||||
font_info_ = font_info;
|
||||
}
|
||||
|
||||
bool TessHOcrRenderer::BeginDocumentHandler() {
|
||||
AppendString(
|
||||
"<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n"
|
||||
"<!DOCTYPE html PUBLIC \"-//W3C//DTD XHTML 1.0 Transitional//EN\"\n"
|
||||
" \"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd\">\n"
|
||||
"<html xmlns=\"http://www.w3.org/1999/xhtml\" xml:lang=\"en\" "
|
||||
"lang=\"en\">\n <head>\n <title>");
|
||||
AppendString(title());
|
||||
AppendString(
|
||||
"</title>\n"
|
||||
"<meta http-equiv=\"Content-Type\" content=\"text/html;"
|
||||
"charset=utf-8\" />\n"
|
||||
" <meta name='ocr-system' content='tesseract " TESSERACT_VERSION_STR
|
||||
"' />\n"
|
||||
" <meta name='ocr-capabilities' content='ocr_page ocr_carea ocr_par"
|
||||
" ocr_line ocrx_word");
|
||||
if (font_info_)
|
||||
AppendString(
|
||||
" ocrp_lang ocrp_dir ocrp_font ocrp_fsize ocrp_wconf");
|
||||
AppendString(
|
||||
"'/>\n"
|
||||
"</head>\n<body>\n");
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool TessHOcrRenderer::EndDocumentHandler() {
|
||||
AppendString(" </body>\n</html>\n");
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool TessHOcrRenderer::AddImageHandler(TessBaseAPI* api) {
|
||||
char* hocr = api->GetHOCRText(imagenum());
|
||||
if (hocr == NULL) return false;
|
||||
|
||||
AppendString(hocr);
|
||||
delete[] hocr;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
/**********************************************************************
|
||||
* TSV Text Renderer interface implementation
|
||||
**********************************************************************/
|
||||
TessTsvRenderer::TessTsvRenderer(const char* outputbase)
|
||||
: TessResultRenderer(outputbase, "tsv") {
|
||||
font_info_ = false;
|
||||
}
|
||||
|
||||
TessTsvRenderer::TessTsvRenderer(const char* outputbase, bool font_info)
|
||||
: TessResultRenderer(outputbase, "tsv") {
|
||||
font_info_ = font_info;
|
||||
}
|
||||
|
||||
bool TessTsvRenderer::BeginDocumentHandler() {
|
||||
// Output TSV column headings
|
||||
AppendString(
|
||||
"level\tpage_num\tblock_num\tpar_num\tline_num\tword_"
|
||||
"num\tleft\ttop\twidth\theight\tconf\ttext\n");
|
||||
return true;
|
||||
}
|
||||
|
||||
bool TessTsvRenderer::EndDocumentHandler() { return true; }
|
||||
|
||||
bool TessTsvRenderer::AddImageHandler(TessBaseAPI* api) {
|
||||
char* tsv = api->GetTSVText(imagenum());
|
||||
if (tsv == NULL) return false;
|
||||
|
||||
AppendString(tsv);
|
||||
delete[] tsv;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
/**********************************************************************
|
||||
* UNLV Text Renderer interface implementation
|
||||
**********************************************************************/
|
||||
TessUnlvRenderer::TessUnlvRenderer(const char *outputbase)
|
||||
: TessResultRenderer(outputbase, "unlv") {
|
||||
}
|
||||
|
||||
bool TessUnlvRenderer::AddImageHandler(TessBaseAPI* api) {
|
||||
char* unlv = api->GetUNLVText();
|
||||
if (unlv == NULL) return false;
|
||||
|
||||
AppendString(unlv);
|
||||
delete[] unlv;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
/**********************************************************************
|
||||
* BoxText Renderer interface implementation
|
||||
**********************************************************************/
|
||||
TessBoxTextRenderer::TessBoxTextRenderer(const char *outputbase)
|
||||
: TessResultRenderer(outputbase, "box") {
|
||||
}
|
||||
|
||||
bool TessBoxTextRenderer::AddImageHandler(TessBaseAPI* api) {
|
||||
char* text = api->GetBoxText(imagenum());
|
||||
if (text == NULL) return false;
|
||||
|
||||
AppendString(text);
|
||||
delete[] text;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
/**********************************************************************
|
||||
* Osd Text Renderer interface implementation
|
||||
**********************************************************************/
|
||||
TessOsdRenderer::TessOsdRenderer(const char* outputbase)
|
||||
: TessResultRenderer(outputbase, "osd") {}
|
||||
|
||||
bool TessOsdRenderer::AddImageHandler(TessBaseAPI* api) {
|
||||
char* osd = api->GetOsdText(imagenum());
|
||||
if (osd == NULL) return false;
|
||||
|
||||
AppendString(osd);
|
||||
delete[] osd;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
} // namespace tesseract
|
|
@ -0,0 +1,271 @@
|
|||
///////////////////////////////////////////////////////////////////////
|
||||
// File: renderer.h
|
||||
// Description: Rendering interface to inject into TessBaseAPI
|
||||
//
|
||||
// (C) Copyright 2011, Google Inc.
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
///////////////////////////////////////////////////////////////////////
|
||||
|
||||
#ifndef TESSERACT_API_RENDERER_H_
|
||||
#define TESSERACT_API_RENDERER_H_
|
||||
|
||||
// To avoid collision with other typenames include the ABSOLUTE MINIMUM
|
||||
// complexity of includes here. Use forward declarations wherever possible
|
||||
// and hide includes of complex types in baseapi.cpp.
|
||||
#include "genericvector.h"
|
||||
#include "platform.h"
|
||||
#include "publictypes.h"
|
||||
|
||||
namespace tesseract {
|
||||
|
||||
class TessBaseAPI;
|
||||
|
||||
/**
|
||||
* Interface for rendering tesseract results into a document, such as text,
|
||||
* HOCR or pdf. This class is abstract. Specific classes handle individual
|
||||
* formats. This interface is then used to inject the renderer class into
|
||||
* tesseract when processing images.
|
||||
*
|
||||
* For simplicity implementing this with tesesract version 3.01,
|
||||
* the renderer contains document state that is cleared from document
|
||||
* to document just as the TessBaseAPI is. This way the base API can just
|
||||
* delegate its rendering functionality to injected renderers, and the
|
||||
* renderers can manage the associated state needed for the specific formats
|
||||
* in addition to the heuristics for producing it.
|
||||
*/
|
||||
class TESS_API TessResultRenderer {
|
||||
public:
|
||||
virtual ~TessResultRenderer();
|
||||
|
||||
// Takes ownership of pointer so must be new'd instance.
|
||||
// Renderers aren't ordered, but appends the sequences of next parameter
|
||||
// and existing next(). The renderers should be unique across both lists.
|
||||
void insert(TessResultRenderer* next);
|
||||
|
||||
// Returns the next renderer or NULL.
|
||||
TessResultRenderer* next() { return next_; }
|
||||
|
||||
/**
|
||||
* Starts a new document with the given title.
|
||||
* This clears the contents of the output data.
|
||||
* Title should use UTF-8 encoding.
|
||||
*/
|
||||
bool BeginDocument(const char* title);
|
||||
|
||||
/**
|
||||
* Adds the recognized text from the source image to the current document.
|
||||
* Invalid if BeginDocument not yet called.
|
||||
*
|
||||
* Note that this API is a bit weird but is designed to fit into the
|
||||
* current TessBaseAPI implementation where the api has lots of state
|
||||
* information that we might want to add in.
|
||||
*/
|
||||
bool AddImage(TessBaseAPI * api, const char * jpgdata, int len);
|
||||
|
||||
/**
|
||||
* Finishes the document and finalizes the output data
|
||||
* Invalid if BeginDocument not yet called.
|
||||
*/
|
||||
bool EndDocument();
|
||||
|
||||
const char* file_extension() const { return file_extension_; }
|
||||
const char* title() const { return title_.c_str(); }
|
||||
|
||||
/**
|
||||
* Returns the index of the last image given to AddImage
|
||||
* (i.e. images are incremented whether the image succeeded or not)
|
||||
*
|
||||
* This is always defined. It means either the number of the
|
||||
* current image, the last image ended, or in the completed document
|
||||
* depending on when in the document lifecycle you are looking at it.
|
||||
* Will return -1 if a document was never started.
|
||||
*/
|
||||
int imagenum() const { return imagenum_; }
|
||||
|
||||
protected:
|
||||
/**
|
||||
* Called by concrete classes.
|
||||
*
|
||||
* outputbase is the name of the output file excluding
|
||||
* extension. For example, "/path/to/chocolate-chip-cookie-recipe"
|
||||
*
|
||||
* extension indicates the file extension to be used for output
|
||||
* files. For example "pdf" will produce a .pdf file, and "hocr"
|
||||
* will produce .hocr files.
|
||||
*/
|
||||
TessResultRenderer(const char *outputbase,
|
||||
const char* extension);
|
||||
|
||||
// Hook for specialized handling in BeginDocument()
|
||||
virtual bool BeginDocumentHandler();
|
||||
|
||||
// This must be overriden to render the OCR'd results
|
||||
virtual bool AddImageHandler(TessBaseAPI* api) = 0;
|
||||
|
||||
virtual bool AddImageHandler(TessBaseAPI* api, const char* jpgdata, int len) = 0;
|
||||
|
||||
// Hook for specialized handling in EndDocument()
|
||||
virtual bool EndDocumentHandler();
|
||||
|
||||
// Renderers can call this to append '\0' terminated strings into
|
||||
// the output string returned by GetOutput.
|
||||
// This method will grow the output buffer if needed.
|
||||
void AppendString(const char* s);
|
||||
|
||||
// Renderers can call this to append binary byte sequences into
|
||||
// the output string returned by GetOutput. Note that s is not necessarily
|
||||
// '\0' terminated (and can contain '\0' within it).
|
||||
// This method will grow the output buffer if needed.
|
||||
void AppendData(const char* s, int len);
|
||||
|
||||
private:
|
||||
const char* file_extension_; // standard extension for generated output
|
||||
STRING title_; // title of document being renderered
|
||||
int imagenum_; // index of last image added
|
||||
|
||||
FILE* fout_; // output file pointer
|
||||
TessResultRenderer* next_; // Can link multiple renderers together
|
||||
bool happy_; // I get grumpy when the disk fills up, etc.
|
||||
};
|
||||
|
||||
/**
|
||||
* Renders tesseract output into a plain UTF-8 text string
|
||||
*/
|
||||
class TESS_API TessTextRenderer : public TessResultRenderer {
|
||||
public:
|
||||
explicit TessTextRenderer(const char *outputbase);
|
||||
|
||||
protected:
|
||||
virtual bool AddImageHandler(TessBaseAPI* api);
|
||||
virtual bool AddImageHandler(TessBaseAPI* api, const char* jpgdata, int len) { return false; }
|
||||
};
|
||||
|
||||
/**
|
||||
* Renders tesseract output into an hocr text string
|
||||
*/
|
||||
class TESS_API TessHOcrRenderer : public TessResultRenderer {
|
||||
public:
|
||||
explicit TessHOcrRenderer(const char *outputbase, bool font_info);
|
||||
explicit TessHOcrRenderer(const char *outputbase);
|
||||
|
||||
protected:
|
||||
virtual bool BeginDocumentHandler();
|
||||
virtual bool AddImageHandler(TessBaseAPI* api);
|
||||
virtual bool AddImageHandler(TessBaseAPI* api, const char* jpgdata, int len) { return false; }
|
||||
virtual bool EndDocumentHandler();
|
||||
|
||||
private:
|
||||
bool font_info_; // whether to print font information
|
||||
};
|
||||
|
||||
/**
|
||||
* Renders Tesseract output into a TSV string
|
||||
*/
|
||||
class TESS_API TessTsvRenderer : public TessResultRenderer {
|
||||
public:
|
||||
explicit TessTsvRenderer(const char* outputbase, bool font_info);
|
||||
explicit TessTsvRenderer(const char* outputbase);
|
||||
|
||||
protected:
|
||||
virtual bool BeginDocumentHandler();
|
||||
virtual bool AddImageHandler(TessBaseAPI* api);
|
||||
virtual bool AddImageHandler(TessBaseAPI* api, const char* jpgdata, int len) { return false; }
|
||||
virtual bool EndDocumentHandler();
|
||||
|
||||
private:
|
||||
bool font_info_; // whether to print font information
|
||||
};
|
||||
|
||||
/**
|
||||
* Renders tesseract output into searchable PDF
|
||||
*/
|
||||
class TESS_API TessPDFRenderer : public TessResultRenderer {
|
||||
public:
|
||||
// datadir is the location of the TESSDATA. We need it because
|
||||
// we load a custom PDF font from this location.
|
||||
TessPDFRenderer(const char* outputbase, const char* datadir);
|
||||
TessPDFRenderer(const char* outputbase, const char* datadir, bool textonly);
|
||||
|
||||
protected:
|
||||
|
||||
virtual bool AddImageHandler(TessBaseAPI* api, const char* jpgdata, int len);
|
||||
|
||||
virtual bool BeginDocumentHandler();
|
||||
|
||||
virtual bool AddImageHandler(TessBaseAPI* api);
|
||||
|
||||
virtual bool EndDocumentHandler();
|
||||
|
||||
private:
|
||||
// We don't want to have every image in memory at once,
|
||||
// so we store some metadata as we go along producing
|
||||
// PDFs one page at a time. At the end, that metadata is
|
||||
// used to make everything that isn't easily handled in a
|
||||
// streaming fashion.
|
||||
long int obj_; // counter for PDF objects
|
||||
GenericVector<long int> offsets_; // offset of every PDF object in bytes
|
||||
GenericVector<long int> pages_; // object number for every /Page object
|
||||
const char *datadir_; // where to find the custom font
|
||||
bool textonly_; // skip images if set
|
||||
// Bookkeeping only. DIY = Do It Yourself.
|
||||
void AppendPDFObjectDIY(size_t objectsize);
|
||||
// Bookkeeping + emit data.
|
||||
void AppendPDFObject(const char *data);
|
||||
// Create the /Contents object for an entire page.
|
||||
char* GetPDFTextObjects(TessBaseAPI* api, double width, double height);
|
||||
// Turn an image into a PDF object. Only transcode if we have to.
|
||||
static bool imageToPDFObj(Pix *pix, char *filename, long int objnum,
|
||||
char **pdf_object, long int *pdf_object_size);
|
||||
static bool imageToPDFObj(const char* jpgdata, int len, long int objnum,
|
||||
char **pdf_object, long int *pdf_object_size);
|
||||
};
|
||||
|
||||
|
||||
/**
|
||||
* Renders tesseract output into a plain UTF-8 text string
|
||||
*/
|
||||
class TESS_API TessUnlvRenderer : public TessResultRenderer {
|
||||
public:
|
||||
explicit TessUnlvRenderer(const char *outputbase);
|
||||
|
||||
protected:
|
||||
virtual bool AddImageHandler(TessBaseAPI* api);
|
||||
virtual bool AddImageHandler(TessBaseAPI* api, const char* jpgdata, int len) { return false; }
|
||||
};
|
||||
|
||||
/**
|
||||
* Renders tesseract output into a plain UTF-8 text string
|
||||
*/
|
||||
class TESS_API TessBoxTextRenderer : public TessResultRenderer {
|
||||
public:
|
||||
explicit TessBoxTextRenderer(const char *outputbase);
|
||||
|
||||
protected:
|
||||
virtual bool AddImageHandler(TessBaseAPI* api);
|
||||
virtual bool AddImageHandler(TessBaseAPI* api, const char* jpgdata, int len) { return false; }
|
||||
};
|
||||
|
||||
/**
|
||||
* Renders tesseract output into an osd text string
|
||||
*/
|
||||
class TESS_API TessOsdRenderer : public TessResultRenderer {
|
||||
public:
|
||||
explicit TessOsdRenderer(const char* outputbase);
|
||||
|
||||
protected:
|
||||
virtual bool AddImageHandler(TessBaseAPI* api);
|
||||
virtual bool AddImageHandler(TessBaseAPI* api, const char* jpgdata, int len) { return false; }
|
||||
};
|
||||
|
||||
} // namespace tesseract.
|
||||
|
||||
#endif // TESSERACT_API_RENDERER_H_
|
|
@ -0,0 +1,546 @@
|
|||
/**********************************************************************
|
||||
* File: tessedit.cpp (Formerly tessedit.c)
|
||||
* Description: Main program for merge of tess and editor.
|
||||
* Author: Ray Smith
|
||||
* Created: Tue Jan 07 15:21:46 GMT 1992
|
||||
*
|
||||
* (C) Copyright 1992, Hewlett-Packard Ltd.
|
||||
** Licensed under the Apache License, Version 2.0 (the "License");
|
||||
** you may not use this file except in compliance with the License.
|
||||
** You may obtain a copy of the License at
|
||||
** http://www.apache.org/licenses/LICENSE-2.0
|
||||
** Unless required by applicable law or agreed to in writing, software
|
||||
** distributed under the License is distributed on an "AS IS" BASIS,
|
||||
** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
** See the License for the specific language governing permissions and
|
||||
** limitations under the License.
|
||||
*
|
||||
**********************************************************************/
|
||||
|
||||
// Include automatically generated configuration file if running autoconf
|
||||
#ifdef HAVE_CONFIG_H
|
||||
#include "config_auto.h"
|
||||
#endif
|
||||
|
||||
#include <iostream>
|
||||
|
||||
#include "allheaders.h"
|
||||
#include "baseapi.h"
|
||||
#include "basedir.h"
|
||||
#include "dict.h"
|
||||
#include "openclwrapper.h"
|
||||
#include "osdetect.h"
|
||||
#include "renderer.h"
|
||||
#include "strngs.h"
|
||||
#include "tprintf.h"
|
||||
#include "StopWatch.h"
|
||||
|
||||
#if defined(HAVE_TIFFIO_H) && defined(_WIN32)
|
||||
|
||||
#include <tiffio.h>
|
||||
|
||||
static void Win32WarningHandler(const char* module, const char* fmt,
|
||||
va_list ap) {
|
||||
if (module != NULL) {
|
||||
fprintf(stderr, "%s: ", module);
|
||||
}
|
||||
fprintf(stderr, "Warning, ");
|
||||
vfprintf(stderr, fmt, ap);
|
||||
fprintf(stderr, ".\n");
|
||||
}
|
||||
|
||||
#endif /* HAVE_TIFFIO_H && _WIN32 */
|
||||
|
||||
void PrintVersionInfo() {
|
||||
char* versionStrP;
|
||||
|
||||
printf("tesseract %s\n", tesseract::TessBaseAPI::Version());
|
||||
|
||||
versionStrP = getLeptonicaVersion();
|
||||
printf(" %s\n", versionStrP);
|
||||
lept_free(versionStrP);
|
||||
|
||||
versionStrP = getImagelibVersions();
|
||||
printf(" %s\n", versionStrP);
|
||||
lept_free(versionStrP);
|
||||
|
||||
#ifdef USE_OPENCL
|
||||
cl_platform_id platform[4];
|
||||
cl_uint num_platforms;
|
||||
|
||||
printf(" OpenCL info:\n");
|
||||
if (clGetPlatformIDs(4, platform, &num_platforms) == CL_SUCCESS) {
|
||||
printf(" Found %u platform(s).\n", num_platforms);
|
||||
for (unsigned n = 0; n < num_platforms; n++) {
|
||||
char info[256];
|
||||
if (clGetPlatformInfo(platform[n], CL_PLATFORM_NAME, 256, info, 0) ==
|
||||
CL_SUCCESS) {
|
||||
printf(" Platform %u name: %s.\n", n + 1, info);
|
||||
}
|
||||
if (clGetPlatformInfo(platform[n], CL_PLATFORM_VERSION, 256, info, 0) ==
|
||||
CL_SUCCESS) {
|
||||
printf(" Version: %s.\n", info);
|
||||
}
|
||||
cl_device_id devices[2];
|
||||
cl_uint num_devices;
|
||||
if (clGetDeviceIDs(platform[n], CL_DEVICE_TYPE_ALL, 2, devices,
|
||||
&num_devices) == CL_SUCCESS) {
|
||||
printf(" Found %u device(s).\n", num_devices);
|
||||
for (unsigned i = 0; i < num_devices; ++i) {
|
||||
if (clGetDeviceInfo(devices[i], CL_DEVICE_NAME, 256, info, 0) ==
|
||||
CL_SUCCESS) {
|
||||
printf(" Device %u name: %s.\n", i + 1, info);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
void PrintUsage(const char* program) {
|
||||
printf(
|
||||
"Usage:\n"
|
||||
" %s --help | --help-psm | --help-oem | --version\n"
|
||||
" %s --list-langs [--tessdata-dir PATH]\n"
|
||||
" %s --print-parameters [options...] [configfile...]\n"
|
||||
" %s imagename|stdin outputbase|stdout [options...] [configfile...]\n",
|
||||
program, program, program, program);
|
||||
}
|
||||
|
||||
void PrintHelpForPSM() {
|
||||
const char* msg =
|
||||
"Page segmentation modes:\n"
|
||||
" 0 Orientation and script detection (OSD) only.\n"
|
||||
" 1 Automatic page segmentation with OSD.\n"
|
||||
" 2 Automatic page segmentation, but no OSD, or OCR.\n"
|
||||
" 3 Fully automatic page segmentation, but no OSD. (Default)\n"
|
||||
" 4 Assume a single column of text of variable sizes.\n"
|
||||
" 5 Assume a single uniform block of vertically aligned text.\n"
|
||||
" 6 Assume a single uniform block of text.\n"
|
||||
" 7 Treat the image as a single text line.\n"
|
||||
" 8 Treat the image as a single word.\n"
|
||||
" 9 Treat the image as a single word in a circle.\n"
|
||||
" 10 Treat the image as a single character.\n"
|
||||
" 11 Sparse text. Find as much text as possible in no"
|
||||
" particular order.\n"
|
||||
" 12 Sparse text with OSD.\n"
|
||||
" 13 Raw line. Treat the image as a single text line,\n"
|
||||
"\t\t\tbypassing hacks that are Tesseract-specific.\n";
|
||||
|
||||
printf("%s", msg);
|
||||
}
|
||||
|
||||
void PrintHelpForOEM() {
|
||||
const char* msg =
|
||||
"OCR Engine modes:\n"
|
||||
" 0 Original Tesseract only.\n"
|
||||
" 1 Cube only.\n"
|
||||
" 2 Tesseract + cube.\n"
|
||||
" 3 Default, based on what is available.\n";
|
||||
|
||||
printf("%s", msg);
|
||||
}
|
||||
|
||||
void PrintHelpMessage(const char* program) {
|
||||
PrintUsage(program);
|
||||
|
||||
const char* ocr_options =
|
||||
"OCR options:\n"
|
||||
" --tessdata-dir PATH Specify the location of tessdata path.\n"
|
||||
" --user-words PATH Specify the location of user words file.\n"
|
||||
" --user-patterns PATH Specify the location of user patterns file.\n"
|
||||
" -l LANG[+LANG] Specify language(s) used for OCR.\n"
|
||||
" -c VAR=VALUE Set value for config variables.\n"
|
||||
" Multiple -c arguments are allowed.\n"
|
||||
" --psm NUM Specify page segmentation mode.\n"
|
||||
" --oem NUM Specify OCR Engine mode.\n"
|
||||
"NOTE: These options must occur before any configfile.\n";
|
||||
|
||||
printf("\n%s\n", ocr_options);
|
||||
PrintHelpForPSM();
|
||||
PrintHelpForOEM();
|
||||
|
||||
const char* single_options =
|
||||
"Single options:\n"
|
||||
" -h, --help Show this help message.\n"
|
||||
" --help-psm Show page segmentation modes.\n"
|
||||
" --help-oem Show OCR Engine modes.\n"
|
||||
" -v, --version Show version information.\n"
|
||||
" --list-langs List available languages for tesseract engine.\n"
|
||||
" --print-parameters Print tesseract parameters to stdout.\n";
|
||||
|
||||
printf("\n%s", single_options);
|
||||
}
|
||||
|
||||
void SetVariablesFromCLArgs(tesseract::TessBaseAPI* api, int argc,
|
||||
char** argv) {
|
||||
char opt1[256], opt2[255];
|
||||
for (int i = 0; i < argc; i++) {
|
||||
if (strcmp(argv[i], "-c") == 0 && i + 1 < argc) {
|
||||
strncpy(opt1, argv[i + 1], 255);
|
||||
opt1[255] = '\0';
|
||||
char* p = strchr(opt1, '=');
|
||||
if (!p) {
|
||||
fprintf(stderr, "Missing = in configvar assignment\n");
|
||||
exit(1);
|
||||
}
|
||||
*p = 0;
|
||||
strncpy(opt2, strchr(argv[i + 1], '=') + 1, 255);
|
||||
opt2[254] = 0;
|
||||
++i;
|
||||
|
||||
if (!api->SetVariable(opt1, opt2)) {
|
||||
fprintf(stderr, "Could not set option: %s=%s\n", opt1, opt2);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void PrintLangsList(tesseract::TessBaseAPI* api) {
|
||||
GenericVector<STRING> languages;
|
||||
api->GetAvailableLanguagesAsVector(&languages);
|
||||
printf("List of available languages (%d):\n", languages.size());
|
||||
for (int index = 0; index < languages.size(); ++index) {
|
||||
STRING& string = languages[index];
|
||||
printf("%s\n", string.string());
|
||||
}
|
||||
api->End();
|
||||
}
|
||||
|
||||
void PrintBanner() {
|
||||
tprintf("Tesseract Open Source OCR Engine v%s with Leptonica\n",
|
||||
tesseract::TessBaseAPI::Version());
|
||||
}
|
||||
|
||||
/**
|
||||
* We have 2 possible sources of pagesegmode: a config file and
|
||||
* the command line. For backwards compatibility reasons, the
|
||||
* default in tesseract is tesseract::PSM_SINGLE_BLOCK, but the
|
||||
* default for this program is tesseract::PSM_AUTO. We will let
|
||||
* the config file take priority, so the command-line default
|
||||
* can take priority over the tesseract default, so we use the
|
||||
* value from the command line only if the retrieved mode
|
||||
* is still tesseract::PSM_SINGLE_BLOCK, indicating no change
|
||||
* in any config file. Therefore the only way to force
|
||||
* tesseract::PSM_SINGLE_BLOCK is from the command line.
|
||||
* It would be simpler if we could set the value before Init,
|
||||
* but that doesn't work.
|
||||
*/
|
||||
void FixPageSegMode(tesseract::TessBaseAPI* api,
|
||||
tesseract::PageSegMode pagesegmode) {
|
||||
if (api->GetPageSegMode() == tesseract::PSM_SINGLE_BLOCK)
|
||||
api->SetPageSegMode(pagesegmode);
|
||||
}
|
||||
|
||||
// NOTE: arg_i is used here to avoid ugly *i so many times in this function
|
||||
void ParseArgs(const int argc, char** argv, const char** lang,
|
||||
const char** image, const char** outputbase,
|
||||
const char** datapath, bool* list_langs, bool* print_parameters,
|
||||
GenericVector<STRING>* vars_vec,
|
||||
GenericVector<STRING>* vars_values, int* arg_i,
|
||||
tesseract::PageSegMode* pagesegmode,
|
||||
tesseract::OcrEngineMode* enginemode) {
|
||||
if (argc == 1) {
|
||||
PrintHelpMessage(argv[0]);
|
||||
exit(0);
|
||||
}
|
||||
|
||||
if (argc == 2) {
|
||||
if ((strcmp(argv[1], "-h") == 0) || (strcmp(argv[1], "--help") == 0)) {
|
||||
PrintHelpMessage(argv[0]);
|
||||
exit(0);
|
||||
}
|
||||
if ((strcmp(argv[1], "--help-psm") == 0)) {
|
||||
PrintHelpForPSM();
|
||||
exit(0);
|
||||
}
|
||||
if ((strcmp(argv[1], "--help-oem") == 0)) {
|
||||
PrintHelpForOEM();
|
||||
exit(0);
|
||||
}
|
||||
if ((strcmp(argv[1], "-v") == 0) || (strcmp(argv[1], "--version") == 0)) {
|
||||
PrintVersionInfo();
|
||||
exit(0);
|
||||
}
|
||||
}
|
||||
|
||||
bool noocr = false;
|
||||
int i = 1;
|
||||
while (i < argc && (*outputbase == NULL || argv[i][0] == '-')) {
|
||||
if (strcmp(argv[i], "-l") == 0 && i + 1 < argc) {
|
||||
*lang = argv[i + 1];
|
||||
++i;
|
||||
}
|
||||
else if (strcmp(argv[i], "--tessdata-dir") == 0 && i + 1 < argc) {
|
||||
*datapath = argv[i + 1];
|
||||
++i;
|
||||
}
|
||||
else if (strcmp(argv[i], "--user-words") == 0 && i + 1 < argc) {
|
||||
vars_vec->push_back("user_words_file");
|
||||
vars_values->push_back(argv[i + 1]);
|
||||
++i;
|
||||
}
|
||||
else if (strcmp(argv[i], "--user-patterns") == 0 && i + 1 < argc) {
|
||||
vars_vec->push_back("user_patterns_file");
|
||||
vars_values->push_back(argv[i + 1]);
|
||||
++i;
|
||||
}
|
||||
else if (strcmp(argv[i], "--list-langs") == 0) {
|
||||
noocr = true;
|
||||
*list_langs = true;
|
||||
}
|
||||
else if (strcmp(argv[i], "-psm") == 0 && i + 1 < argc) {
|
||||
// The parameter -psm is deprecated and was replaced by --psm.
|
||||
// It is still supported for compatibility reasons.
|
||||
*pagesegmode = static_cast<tesseract::PageSegMode>(atoi(argv[i + 1]));
|
||||
++i;
|
||||
}
|
||||
else if (strcmp(argv[i], "--psm") == 0 && i + 1 < argc) {
|
||||
*pagesegmode = static_cast<tesseract::PageSegMode>(atoi(argv[i + 1]));
|
||||
++i;
|
||||
}
|
||||
else if (strcmp(argv[i], "--oem") == 0 && i + 1 < argc) {
|
||||
*enginemode = static_cast<tesseract::OcrEngineMode>(atoi(argv[i + 1]));
|
||||
++i;
|
||||
}
|
||||
else if (strcmp(argv[i], "--print-parameters") == 0) {
|
||||
noocr = true;
|
||||
*print_parameters = true;
|
||||
}
|
||||
else if (strcmp(argv[i], "-c") == 0 && i + 1 < argc) {
|
||||
// handled properly after api init
|
||||
++i;
|
||||
}
|
||||
else if (*image == NULL) {
|
||||
*image = argv[i];
|
||||
}
|
||||
else if (*outputbase == NULL) {
|
||||
*outputbase = argv[i];
|
||||
}
|
||||
++i;
|
||||
}
|
||||
|
||||
*arg_i = i;
|
||||
|
||||
if (argc == 2 && strcmp(argv[1], "--list-langs") == 0) {
|
||||
*list_langs = true;
|
||||
noocr = true;
|
||||
}
|
||||
|
||||
if (*outputbase == NULL && noocr == false) {
|
||||
PrintHelpMessage(argv[0]);
|
||||
exit(1);
|
||||
}
|
||||
}
|
||||
|
||||
void PreloadRenderers(
|
||||
tesseract::TessBaseAPI* api,
|
||||
tesseract::PointerVector<tesseract::TessResultRenderer>* renderers,
|
||||
tesseract::PageSegMode pagesegmode, const char* outputbase) {
|
||||
if (pagesegmode == tesseract::PSM_OSD_ONLY) {
|
||||
renderers->push_back(new tesseract::TessOsdRenderer(outputbase));
|
||||
}
|
||||
else {
|
||||
bool b;
|
||||
api->GetBoolVariable("tessedit_create_hocr", &b);
|
||||
if (b) {
|
||||
bool font_info;
|
||||
api->GetBoolVariable("hocr_font_info", &font_info);
|
||||
renderers->push_back(
|
||||
new tesseract::TessHOcrRenderer(outputbase, font_info));
|
||||
}
|
||||
|
||||
api->GetBoolVariable("tessedit_create_tsv", &b);
|
||||
if (b) {
|
||||
bool font_info;
|
||||
api->GetBoolVariable("hocr_font_info", &font_info);
|
||||
renderers->push_back(
|
||||
new tesseract::TessTsvRenderer(outputbase, font_info));
|
||||
}
|
||||
|
||||
api->GetBoolVariable("tessedit_create_pdf", &b);
|
||||
if (b) {
|
||||
bool textonly;
|
||||
api->GetBoolVariable("textonly_pdf", &textonly);
|
||||
renderers->push_back(new tesseract::TessPDFRenderer(
|
||||
outputbase, api->GetDatapath(), textonly));
|
||||
}
|
||||
|
||||
api->GetBoolVariable("tessedit_write_unlv", &b);
|
||||
if (b) {
|
||||
renderers->push_back(new tesseract::TessUnlvRenderer(outputbase));
|
||||
}
|
||||
|
||||
api->GetBoolVariable("tessedit_create_boxfile", &b);
|
||||
if (b) {
|
||||
renderers->push_back(new tesseract::TessBoxTextRenderer(outputbase));
|
||||
}
|
||||
|
||||
api->GetBoolVariable("tessedit_create_txt", &b);
|
||||
if (b || renderers->empty()) {
|
||||
renderers->push_back(new tesseract::TessTextRenderer(outputbase));
|
||||
}
|
||||
}
|
||||
|
||||
if (!renderers->empty()) {
|
||||
// Since the PointerVector auto-deletes, null-out the renderers that are
|
||||
// added to the root, and leave the root in the vector.
|
||||
for (int r = 1; r < renderers->size(); ++r) {
|
||||
(*renderers)[0]->insert((*renderers)[r]);
|
||||
(*renderers)[r] = NULL;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**********************************************************************
|
||||
* main()
|
||||
*
|
||||
**********************************************************************/
|
||||
|
||||
int main(int argc, char** argv) {
|
||||
const char* lang = "osd";
|
||||
const char* image = NULL;
|
||||
const char* outputbase = NULL;
|
||||
const char* datapath = NULL;
|
||||
bool list_langs = false;
|
||||
bool print_parameters = false;
|
||||
int arg_i = 1;
|
||||
tesseract::PageSegMode pagesegmode = tesseract::PSM_AUTO_OSD;
|
||||
tesseract::OcrEngineMode enginemode = tesseract::OEM_DEFAULT;
|
||||
/* main() calls functions like ParseArgs which call exit().
|
||||
* This results in memory leaks if vars_vec and vars_values are
|
||||
* declared as auto variables (destructor is not called then). */
|
||||
static GenericVector<STRING> vars_vec;
|
||||
static GenericVector<STRING> vars_values;
|
||||
|
||||
#ifdef NDEBUG
|
||||
// Disable debugging and informational messages from Leptonica.
|
||||
setMsgSeverity(L_SEVERITY_ERROR);
|
||||
#endif
|
||||
|
||||
#if defined(HAVE_TIFFIO_H) && defined(_WIN32)
|
||||
/* Show libtiff warnings on console (not in GUI). */
|
||||
TIFFSetWarningHandler(Win32WarningHandler);
|
||||
#endif /* HAVE_TIFFIO_H && _WIN32 */
|
||||
|
||||
ParseArgs(argc, argv, &lang, &image, &outputbase, &datapath, &list_langs,
|
||||
&print_parameters, &vars_vec, &vars_values, &arg_i, &pagesegmode,
|
||||
&enginemode);
|
||||
|
||||
bool banner = false;
|
||||
if (outputbase != NULL && strcmp(outputbase, "-") &&
|
||||
strcmp(outputbase, "stdout")) {
|
||||
banner = true;
|
||||
}
|
||||
|
||||
PERF_COUNT_START("Tesseract:main")
|
||||
|
||||
// Call GlobalDawgCache here to create the global DawgCache object before
|
||||
// the TessBaseAPI object. This fixes the order of destructor calls:
|
||||
// first TessBaseAPI must be destructed, DawgCache must be the last object.
|
||||
tesseract::Dict::GlobalDawgCache();
|
||||
|
||||
// Avoid memory leak caused by auto variable when exit() is called.
|
||||
static tesseract::TessBaseAPI api;
|
||||
|
||||
api.SetOutputName(outputbase);
|
||||
|
||||
int init_failed = api.Init(datapath, lang, enginemode, &(argv[arg_i]),
|
||||
argc - arg_i, &vars_vec, &vars_values, false);
|
||||
if (init_failed) {
|
||||
fprintf(stderr, "Could not initialize tesseract.\n");
|
||||
getchar();
|
||||
return EXIT_FAILURE;
|
||||
}
|
||||
|
||||
SetVariablesFromCLArgs(&api, argc, argv);
|
||||
|
||||
if (list_langs) {
|
||||
PrintLangsList(&api);
|
||||
getchar();
|
||||
return EXIT_SUCCESS;
|
||||
}
|
||||
|
||||
if (print_parameters) {
|
||||
FILE* fout = stdout;
|
||||
fprintf(stdout, "Tesseract parameters:\n");
|
||||
api.PrintVariables(fout);
|
||||
api.End();
|
||||
getchar();
|
||||
return EXIT_SUCCESS;
|
||||
}
|
||||
|
||||
FixPageSegMode(&api, pagesegmode);
|
||||
|
||||
if (pagesegmode == tesseract::PSM_AUTO_OSD) {
|
||||
int ret_val = EXIT_SUCCESS;
|
||||
|
||||
Pix* pixs = pixRead(image);
|
||||
if (!pixs) {
|
||||
fprintf(stderr, "Cannot open input file: %s\n", image);
|
||||
getchar();
|
||||
return 2;
|
||||
}
|
||||
|
||||
api.SetImage(pixs);
|
||||
|
||||
tesseract::Orientation orientation;
|
||||
tesseract::WritingDirection direction;
|
||||
tesseract::TextlineOrder order;
|
||||
float deskew_angle;
|
||||
|
||||
tesseract::PageIterator* it = api.AnalyseLayout();
|
||||
if (it) {
|
||||
|
||||
StopWatch timer;
|
||||
timer.reset();
|
||||
it->Orientation(&orientation, &direction, &order, &deskew_angle);
|
||||
|
||||
tprintf(
|
||||
"Orientation: %d\nWritingDirection: %d\nTextlineOrder: %d\n"
|
||||
"Deskew angle: %.4f\n time: %.4f\n img: %s",
|
||||
orientation, direction, order, deskew_angle, timer.elapsed_s(), image);
|
||||
getchar();
|
||||
}
|
||||
else {
|
||||
ret_val = EXIT_FAILURE;
|
||||
}
|
||||
|
||||
delete it;
|
||||
|
||||
pixDestroy(&pixs);
|
||||
return ret_val;
|
||||
}
|
||||
|
||||
// set in_training_mode to true when using one of these configs:
|
||||
// ambigs.train, box.train, box.train.stderr, linebox, rebox
|
||||
bool b = false;
|
||||
bool in_training_mode =
|
||||
(api.GetBoolVariable("tessedit_ambigs_training", &b) && b) ||
|
||||
(api.GetBoolVariable("tessedit_resegment_from_boxes", &b) && b) ||
|
||||
(api.GetBoolVariable("tessedit_make_boxes_from_boxes", &b) && b);
|
||||
|
||||
// Avoid memory leak caused by auto variable when exit() is called.
|
||||
static tesseract::PointerVector<tesseract::TessResultRenderer> renderers;
|
||||
|
||||
if (in_training_mode) {
|
||||
renderers.push_back(NULL);
|
||||
}
|
||||
else {
|
||||
PreloadRenderers(&api, &renderers, pagesegmode, outputbase);
|
||||
}
|
||||
|
||||
if (!renderers.empty()) {
|
||||
if (banner) PrintBanner();
|
||||
bool succeed = api.ProcessPages(image, NULL, 0, renderers[0]);
|
||||
if (!succeed) {
|
||||
fprintf(stderr, "Error during processing.\n");
|
||||
return EXIT_FAILURE;
|
||||
}
|
||||
}
|
||||
|
||||
PERF_COUNT_END
|
||||
|
||||
return EXIT_SUCCESS;
|
||||
}
|
|
@ -0,0 +1,126 @@
|
|||
/**********************************************************************
|
||||
* File: adaptions.cpp (Formerly adaptions.c)
|
||||
* Description: Functions used to adapt to blobs already confidently
|
||||
* identified
|
||||
* Author: Chris Newton
|
||||
* Created: Thu Oct 7 10:17:28 BST 1993
|
||||
*
|
||||
* (C) Copyright 1992, Hewlett-Packard Ltd.
|
||||
** Licensed under the Apache License, Version 2.0 (the "License");
|
||||
** you may not use this file except in compliance with the License.
|
||||
** You may obtain a copy of the License at
|
||||
** http://www.apache.org/licenses/LICENSE-2.0
|
||||
** Unless required by applicable law or agreed to in writing, software
|
||||
** distributed under the License is distributed on an "AS IS" BASIS,
|
||||
** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
** See the License for the specific language governing permissions and
|
||||
** limitations under the License.
|
||||
*
|
||||
**********************************************************************/
|
||||
|
||||
#ifdef _MSC_VER
|
||||
#pragma warning(disable:4244) // Conversion warnings
|
||||
#pragma warning(disable:4305) // int/float warnings
|
||||
#endif
|
||||
|
||||
#ifdef __UNIX__
|
||||
#include <assert.h>
|
||||
#endif
|
||||
#include <ctype.h>
|
||||
#include <string.h>
|
||||
#include "tessbox.h"
|
||||
#include "tessvars.h"
|
||||
#include "memry.h"
|
||||
#include "reject.h"
|
||||
#include "control.h"
|
||||
#include "stopper.h"
|
||||
#include "tesseractclass.h"
|
||||
|
||||
// Include automatically generated configuration file if running autoconf.
|
||||
#ifdef HAVE_CONFIG_H
|
||||
#include "config_auto.h"
|
||||
#endif
|
||||
|
||||
namespace tesseract {
|
||||
BOOL8 Tesseract::word_adaptable( //should we adapt?
|
||||
WERD_RES *word,
|
||||
uinT16 mode) {
|
||||
if (tessedit_adaption_debug) {
|
||||
tprintf("Running word_adaptable() for %s rating %.4f certainty %.4f\n",
|
||||
word->best_choice == NULL ? "" :
|
||||
word->best_choice->unichar_string().string(),
|
||||
word->best_choice->rating(), word->best_choice->certainty());
|
||||
}
|
||||
|
||||
BOOL8 status = FALSE;
|
||||
BITS16 flags(mode);
|
||||
|
||||
enum MODES
|
||||
{
|
||||
ADAPTABLE_WERD,
|
||||
ACCEPTABLE_WERD,
|
||||
CHECK_DAWGS,
|
||||
CHECK_SPACES,
|
||||
CHECK_ONE_ELL_CONFLICT,
|
||||
CHECK_AMBIG_WERD
|
||||
};
|
||||
|
||||
/*
|
||||
0: NO adaption
|
||||
*/
|
||||
if (mode == 0) {
|
||||
if (tessedit_adaption_debug) tprintf("adaption disabled\n");
|
||||
return FALSE;
|
||||
}
|
||||
|
||||
if (flags.bit(ADAPTABLE_WERD)) {
|
||||
status |= word->tess_would_adapt; // result of Classify::AdaptableWord()
|
||||
if (tessedit_adaption_debug && !status) {
|
||||
tprintf("tess_would_adapt bit is false\n");
|
||||
}
|
||||
}
|
||||
|
||||
if (flags.bit(ACCEPTABLE_WERD)) {
|
||||
status |= word->tess_accepted;
|
||||
if (tessedit_adaption_debug && !status) {
|
||||
tprintf("tess_accepted bit is false\n");
|
||||
}
|
||||
}
|
||||
|
||||
if (!status) { // If not set then
|
||||
return FALSE; // ignore other checks
|
||||
}
|
||||
|
||||
if (flags.bit(CHECK_DAWGS) &&
|
||||
(word->best_choice->permuter() != SYSTEM_DAWG_PERM) &&
|
||||
(word->best_choice->permuter() != FREQ_DAWG_PERM) &&
|
||||
(word->best_choice->permuter() != USER_DAWG_PERM) &&
|
||||
(word->best_choice->permuter() != NUMBER_PERM)) {
|
||||
if (tessedit_adaption_debug) tprintf("word not in dawgs\n");
|
||||
return FALSE;
|
||||
}
|
||||
|
||||
if (flags.bit(CHECK_ONE_ELL_CONFLICT) && one_ell_conflict(word, FALSE)) {
|
||||
if (tessedit_adaption_debug) tprintf("word has ell conflict\n");
|
||||
return FALSE;
|
||||
}
|
||||
|
||||
if (flags.bit(CHECK_SPACES) &&
|
||||
(strchr(word->best_choice->unichar_string().string(), ' ') != NULL)) {
|
||||
if (tessedit_adaption_debug) tprintf("word contains spaces\n");
|
||||
return FALSE;
|
||||
}
|
||||
|
||||
if (flags.bit(CHECK_AMBIG_WERD) &&
|
||||
word->best_choice->dangerous_ambig_found()) {
|
||||
if (tessedit_adaption_debug) tprintf("word is ambiguous\n");
|
||||
return FALSE;
|
||||
}
|
||||
|
||||
if (tessedit_adaption_debug) {
|
||||
tprintf("returning status %d\n", status);
|
||||
}
|
||||
return status;
|
||||
}
|
||||
|
||||
} // namespace tesseract
|
|
@ -0,0 +1,814 @@
|
|||
/**********************************************************************
|
||||
* File: applybox.cpp (Formerly applybox.c)
|
||||
* Description: Re segment rows according to box file data
|
||||
* Author: Phil Cheatle
|
||||
* Created: Wed Nov 24 09:11:23 GMT 1993
|
||||
*
|
||||
* (C) Copyright 1993, Hewlett-Packard Ltd.
|
||||
** Licensed under the Apache License, Version 2.0 (the "License");
|
||||
** you may not use this file except in compliance with the License.
|
||||
** You may obtain a copy of the License at
|
||||
** http://www.apache.org/licenses/LICENSE-2.0
|
||||
** Unless required by applicable law or agreed to in writing, software
|
||||
** distributed under the License is distributed on an "AS IS" BASIS,
|
||||
** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
** See the License for the specific language governing permissions and
|
||||
** limitations under the License.
|
||||
*
|
||||
**********************************************************************/
|
||||
|
||||
#ifdef _MSC_VER
|
||||
#pragma warning(disable:4244) // Conversion warnings
|
||||
#endif
|
||||
|
||||
#include <ctype.h>
|
||||
#include <string.h>
|
||||
#ifdef __UNIX__
|
||||
#include <assert.h>
|
||||
#include <errno.h>
|
||||
#endif
|
||||
#include "allheaders.h"
|
||||
#include "boxread.h"
|
||||
#include "chopper.h"
|
||||
#include "pageres.h"
|
||||
#include "unichar.h"
|
||||
#include "unicharset.h"
|
||||
#include "tesseractclass.h"
|
||||
#include "genericvector.h"
|
||||
|
||||
/** Max number of blobs to classify together in FindSegmentation. */
|
||||
const int kMaxGroupSize = 4;
|
||||
/// Max fraction of median allowed as deviation in xheight before switching
|
||||
/// to median.
|
||||
const double kMaxXHeightDeviationFraction = 0.125;
|
||||
|
||||
/**
|
||||
* The box file is assumed to contain box definitions, one per line, of the
|
||||
* following format for blob-level boxes:
|
||||
* @verbatim
|
||||
* <UTF8 str> <left> <bottom> <right> <top> <page id>
|
||||
* @endverbatim
|
||||
* and for word/line-level boxes:
|
||||
* @verbatim
|
||||
* WordStr <left> <bottom> <right> <top> <page id> #<space-delimited word str>
|
||||
* @endverbatim
|
||||
* NOTES:
|
||||
* The boxes use tesseract coordinates, i.e. 0,0 is at BOTTOM-LEFT.
|
||||
*
|
||||
* <page id> is 0-based, and the page number is used for multipage input (tiff).
|
||||
*
|
||||
* In the blob-level form, each line represents a recognizable unit, which may
|
||||
* be several UTF-8 bytes, but there is a bounding box around each recognizable
|
||||
* unit, and no classifier is needed to train in this mode (bootstrapping.)
|
||||
*
|
||||
* In the word/line-level form, the line begins with the literal "WordStr", and
|
||||
* the bounding box bounds either a whole line or a whole word. The recognizable
|
||||
* units in the word/line are listed after the # at the end of the line and
|
||||
* are space delimited, ignoring any original spaces on the line.
|
||||
* Eg.
|
||||
* @verbatim
|
||||
* word -> #w o r d
|
||||
* multi word line -> #m u l t i w o r d l i n e
|
||||
* @endverbatim
|
||||
* The recognizable units must be space-delimited in order to allow multiple
|
||||
* unicodes to be used for a single recognizable unit, eg Hindi.
|
||||
*
|
||||
* In this mode, the classifier must have been pre-trained with the desired
|
||||
* character set, or it will not be able to find the character segmentations.
|
||||
*/
|
||||
|
||||
namespace tesseract {
|
||||
|
||||
static void clear_any_old_text(BLOCK_LIST *block_list) {
|
||||
BLOCK_IT block_it(block_list);
|
||||
for (block_it.mark_cycle_pt();
|
||||
!block_it.cycled_list(); block_it.forward()) {
|
||||
ROW_IT row_it(block_it.data()->row_list());
|
||||
for (row_it.mark_cycle_pt(); !row_it.cycled_list(); row_it.forward()) {
|
||||
WERD_IT word_it(row_it.data()->word_list());
|
||||
for (word_it.mark_cycle_pt();
|
||||
!word_it.cycled_list(); word_it.forward()) {
|
||||
word_it.data()->set_text("");
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Applies the box file based on the image name fname, and resegments
|
||||
// the words in the block_list (page), with:
|
||||
// blob-mode: one blob per line in the box file, words as input.
|
||||
// word/line-mode: one blob per space-delimited unit after the #, and one word
|
||||
// per line in the box file. (See comment above for box file format.)
|
||||
// If find_segmentation is true, (word/line mode) then the classifier is used
|
||||
// to re-segment words/lines to match the space-delimited truth string for
|
||||
// each box. In this case, the input box may be for a word or even a whole
|
||||
// text line, and the output words will contain multiple blobs corresponding
|
||||
// to the space-delimited input string.
|
||||
// With find_segmentation false, no classifier is needed, but the chopper
|
||||
// can still be used to correctly segment touching characters with the help
|
||||
// of the input boxes.
|
||||
// In the returned PAGE_RES, the WERD_RES are setup as they would be returned
|
||||
// from normal classification, ie. with a word, chopped_word, rebuild_word,
|
||||
// seam_array, denorm, box_word, and best_state, but NO best_choice or
|
||||
// raw_choice, as they would require a UNICHARSET, which we aim to avoid.
|
||||
// Instead, the correct_text member of WERD_RES is set, and this may be later
|
||||
// converted to a best_choice using CorrectClassifyWords. CorrectClassifyWords
|
||||
// is not required before calling ApplyBoxTraining.
|
||||
PAGE_RES* Tesseract::ApplyBoxes(const STRING& fname,
|
||||
bool find_segmentation,
|
||||
BLOCK_LIST *block_list) {
|
||||
GenericVector<TBOX> boxes;
|
||||
GenericVector<STRING> texts, full_texts;
|
||||
if (!ReadAllBoxes(applybox_page, true, fname, &boxes, &texts, &full_texts,
|
||||
NULL)) {
|
||||
return NULL; // Can't do it.
|
||||
}
|
||||
|
||||
int box_count = boxes.size();
|
||||
int box_failures = 0;
|
||||
// Add an empty everything to the end.
|
||||
boxes.push_back(TBOX());
|
||||
texts.push_back(STRING());
|
||||
full_texts.push_back(STRING());
|
||||
|
||||
// In word mode, we use the boxes to make a word for each box, but
|
||||
// in blob mode we use the existing words and maximally chop them first.
|
||||
PAGE_RES* page_res = find_segmentation ?
|
||||
NULL : SetupApplyBoxes(boxes, block_list);
|
||||
clear_any_old_text(block_list);
|
||||
|
||||
for (int i = 0; i < boxes.size() - 1; i++) {
|
||||
bool foundit = false;
|
||||
if (page_res != NULL) {
|
||||
if (i == 0) {
|
||||
foundit = ResegmentCharBox(page_res, NULL, boxes[i], boxes[i + 1],
|
||||
full_texts[i].string());
|
||||
}
|
||||
else {
|
||||
foundit = ResegmentCharBox(page_res, &boxes[i - 1], boxes[i],
|
||||
boxes[i + 1], full_texts[i].string());
|
||||
}
|
||||
}
|
||||
else {
|
||||
foundit = ResegmentWordBox(block_list, boxes[i], boxes[i + 1],
|
||||
texts[i].string());
|
||||
}
|
||||
if (!foundit) {
|
||||
box_failures++;
|
||||
ReportFailedBox(i, boxes[i], texts[i].string(),
|
||||
"FAILURE! Couldn't find a matching blob");
|
||||
}
|
||||
}
|
||||
|
||||
if (page_res == NULL) {
|
||||
// In word/line mode, we now maximally chop all the words and resegment
|
||||
// them with the classifier.
|
||||
page_res = SetupApplyBoxes(boxes, block_list);
|
||||
ReSegmentByClassification(page_res);
|
||||
}
|
||||
if (applybox_debug > 0) {
|
||||
tprintf("APPLY_BOXES:\n");
|
||||
tprintf(" Boxes read from boxfile: %6d\n", box_count);
|
||||
if (box_failures > 0)
|
||||
tprintf(" Boxes failed resegmentation: %6d\n", box_failures);
|
||||
}
|
||||
TidyUp(page_res);
|
||||
return page_res;
|
||||
}
|
||||
|
||||
// Helper computes median xheight in the image.
|
||||
static double MedianXHeight(BLOCK_LIST *block_list) {
|
||||
BLOCK_IT block_it(block_list);
|
||||
STATS xheights(0, block_it.data()->bounding_box().height());
|
||||
for (block_it.mark_cycle_pt();
|
||||
!block_it.cycled_list(); block_it.forward()) {
|
||||
ROW_IT row_it(block_it.data()->row_list());
|
||||
for (row_it.mark_cycle_pt(); !row_it.cycled_list(); row_it.forward()) {
|
||||
xheights.add(IntCastRounded(row_it.data()->x_height()), 1);
|
||||
}
|
||||
}
|
||||
return xheights.median();
|
||||
}
|
||||
|
||||
/// Any row xheight that is significantly different from the median is set
|
||||
/// to the median.
|
||||
void Tesseract::PreenXHeights(BLOCK_LIST *block_list) {
|
||||
double median_xheight = MedianXHeight(block_list);
|
||||
double max_deviation = kMaxXHeightDeviationFraction * median_xheight;
|
||||
// Strip all fuzzy space markers to simplify the PAGE_RES.
|
||||
BLOCK_IT b_it(block_list);
|
||||
for (b_it.mark_cycle_pt(); !b_it.cycled_list(); b_it.forward()) {
|
||||
BLOCK* block = b_it.data();
|
||||
ROW_IT r_it(block->row_list());
|
||||
for (r_it.mark_cycle_pt(); !r_it.cycled_list(); r_it.forward()) {
|
||||
ROW* row = r_it.data();
|
||||
float diff = fabs(row->x_height() - median_xheight);
|
||||
if (diff > max_deviation) {
|
||||
if (applybox_debug) {
|
||||
tprintf("row xheight=%g, but median xheight = %g\n",
|
||||
row->x_height(), median_xheight);
|
||||
}
|
||||
row->set_x_height(static_cast<float>(median_xheight));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Builds a PAGE_RES from the block_list in the way required for ApplyBoxes:
|
||||
/// All fuzzy spaces are removed, and all the words are maximally chopped.
|
||||
PAGE_RES* Tesseract::SetupApplyBoxes(const GenericVector<TBOX>& boxes,
|
||||
BLOCK_LIST *block_list) {
|
||||
PreenXHeights(block_list);
|
||||
// Strip all fuzzy space markers to simplify the PAGE_RES.
|
||||
BLOCK_IT b_it(block_list);
|
||||
for (b_it.mark_cycle_pt(); !b_it.cycled_list(); b_it.forward()) {
|
||||
BLOCK* block = b_it.data();
|
||||
ROW_IT r_it(block->row_list());
|
||||
for (r_it.mark_cycle_pt(); !r_it.cycled_list(); r_it.forward()) {
|
||||
ROW* row = r_it.data();
|
||||
WERD_IT w_it(row->word_list());
|
||||
for (w_it.mark_cycle_pt(); !w_it.cycled_list(); w_it.forward()) {
|
||||
WERD* word = w_it.data();
|
||||
if (word->cblob_list()->empty()) {
|
||||
delete w_it.extract();
|
||||
}
|
||||
else {
|
||||
word->set_flag(W_FUZZY_SP, false);
|
||||
word->set_flag(W_FUZZY_NON, false);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
PAGE_RES* page_res = new PAGE_RES(false, block_list, NULL);
|
||||
PAGE_RES_IT pr_it(page_res);
|
||||
WERD_RES* word_res;
|
||||
while ((word_res = pr_it.word()) != NULL) {
|
||||
MaximallyChopWord(boxes, pr_it.block()->block,
|
||||
pr_it.row()->row, word_res);
|
||||
pr_it.forward();
|
||||
}
|
||||
return page_res;
|
||||
}
|
||||
|
||||
/// Tests the chopper by exhaustively running chop_one_blob.
|
||||
/// The word_res will contain filled chopped_word, seam_array, denorm,
|
||||
/// box_word and best_state for the maximally chopped word.
|
||||
void Tesseract::MaximallyChopWord(const GenericVector<TBOX>& boxes,
|
||||
BLOCK* block, ROW* row,
|
||||
WERD_RES* word_res) {
|
||||
if (!word_res->SetupForRecognition(unicharset, this, BestPix(),
|
||||
tessedit_ocr_engine_mode, NULL,
|
||||
classify_bln_numeric_mode,
|
||||
textord_use_cjk_fp_model,
|
||||
poly_allow_detailed_fx,
|
||||
row, block)) {
|
||||
word_res->CloneChoppedToRebuild();
|
||||
return;
|
||||
}
|
||||
if (chop_debug) {
|
||||
tprintf("Maximally chopping word at:");
|
||||
word_res->word->bounding_box().print();
|
||||
}
|
||||
GenericVector<BLOB_CHOICE*> blob_choices;
|
||||
ASSERT_HOST(!word_res->chopped_word->blobs.empty());
|
||||
float rating = static_cast<float>(MAX_INT8);
|
||||
for (int i = 0; i < word_res->chopped_word->NumBlobs(); ++i) {
|
||||
// The rating and certainty are not quite arbitrary. Since
|
||||
// select_blob_to_chop uses the worst certainty to choose, they all have
|
||||
// to be different, so starting with MAX_INT8, subtract 1/8 for each blob
|
||||
// in here, and then divide by e each time they are chopped, which
|
||||
// should guarantee a set of unequal values for the whole tree of blobs
|
||||
// produced, however much chopping is required. The chops are thus only
|
||||
// limited by the ability of the chopper to find suitable chop points,
|
||||
// and not by the value of the certainties.
|
||||
BLOB_CHOICE* choice =
|
||||
new BLOB_CHOICE(0, rating, -rating, -1, 0.0f, 0.0f, 0.0f, BCC_FAKE);
|
||||
blob_choices.push_back(choice);
|
||||
rating -= 0.125f;
|
||||
}
|
||||
const double e = exp(1.0); // The base of natural logs.
|
||||
int blob_number;
|
||||
int right_chop_index = 0;
|
||||
if (!assume_fixed_pitch_char_segment) {
|
||||
// We only chop if the language is not fixed pitch like CJK.
|
||||
SEAM* seam = NULL;
|
||||
while ((seam = chop_one_blob(boxes, blob_choices, word_res,
|
||||
&blob_number)) != NULL) {
|
||||
word_res->InsertSeam(blob_number, seam);
|
||||
BLOB_CHOICE* left_choice = blob_choices[blob_number];
|
||||
rating = left_choice->rating() / e;
|
||||
left_choice->set_rating(rating);
|
||||
left_choice->set_certainty(-rating);
|
||||
// combine confidence w/ serial #
|
||||
BLOB_CHOICE* right_choice = new BLOB_CHOICE(++right_chop_index,
|
||||
rating - 0.125f, -rating, -1,
|
||||
0.0f, 0.0f, 0.0f, BCC_FAKE);
|
||||
blob_choices.insert(right_choice, blob_number + 1);
|
||||
}
|
||||
}
|
||||
word_res->CloneChoppedToRebuild();
|
||||
word_res->FakeClassifyWord(blob_choices.size(), &blob_choices[0]);
|
||||
}
|
||||
|
||||
/// Helper to compute the dispute resolution metric.
|
||||
/// Disputed blob resolution. The aim is to give the blob to the most
|
||||
/// appropriate boxfile box. Most of the time it is obvious, but if
|
||||
/// two boxfile boxes overlap significantly it is not. If a small boxfile
|
||||
/// box takes most of the blob, and a large boxfile box does too, then
|
||||
/// we want the small boxfile box to get it, but if the small box
|
||||
/// is much smaller than the blob, we don't want it to get it.
|
||||
/// Details of the disputed blob resolution:
|
||||
/// Given a box with area A, and a blob with area B, with overlap area C,
|
||||
/// then the miss metric is (A-C)(B-C)/(AB) and the box with minimum
|
||||
/// miss metric gets the blob.
|
||||
static double BoxMissMetric(const TBOX& box1, const TBOX& box2) {
|
||||
int overlap_area = box1.intersection(box2).area();
|
||||
double miss_metric = box1.area() - overlap_area;
|
||||
miss_metric /= box1.area();
|
||||
miss_metric *= box2.area() - overlap_area;
|
||||
miss_metric /= box2.area();
|
||||
return miss_metric;
|
||||
}
|
||||
|
||||
/// Gather consecutive blobs that match the given box into the best_state
|
||||
/// and corresponding correct_text.
|
||||
///
|
||||
/// Fights over which box owns which blobs are settled by pre-chopping and
|
||||
/// applying the blobs to box or next_box with the least non-overlap.
|
||||
/// @return false if the box was in error, which can only be caused by
|
||||
/// failing to find an appropriate blob for a box.
|
||||
///
|
||||
/// This means that occasionally, blobs may be incorrectly segmented if the
|
||||
/// chopper fails to find a suitable chop point.
|
||||
bool Tesseract::ResegmentCharBox(PAGE_RES* page_res, const TBOX *prev_box,
|
||||
const TBOX& box, const TBOX& next_box,
|
||||
const char* correct_text) {
|
||||
if (applybox_debug > 1) {
|
||||
tprintf("\nAPPLY_BOX: in ResegmentCharBox() for %s\n", correct_text);
|
||||
}
|
||||
PAGE_RES_IT page_res_it(page_res);
|
||||
WERD_RES* word_res;
|
||||
for (word_res = page_res_it.word(); word_res != NULL;
|
||||
word_res = page_res_it.forward()) {
|
||||
if (!word_res->box_word->bounding_box().major_overlap(box))
|
||||
continue;
|
||||
if (applybox_debug > 1) {
|
||||
tprintf("Checking word box:");
|
||||
word_res->box_word->bounding_box().print();
|
||||
}
|
||||
int word_len = word_res->box_word->length();
|
||||
for (int i = 0; i < word_len; ++i) {
|
||||
TBOX char_box = TBOX();
|
||||
int blob_count = 0;
|
||||
for (blob_count = 0; i + blob_count < word_len; ++blob_count) {
|
||||
TBOX blob_box = word_res->box_word->BlobBox(i + blob_count);
|
||||
if (!blob_box.major_overlap(box))
|
||||
break;
|
||||
if (word_res->correct_text[i + blob_count].length() > 0)
|
||||
break; // Blob is claimed already.
|
||||
double current_box_miss_metric = BoxMissMetric(blob_box, box);
|
||||
double next_box_miss_metric = BoxMissMetric(blob_box, next_box);
|
||||
if (applybox_debug > 2) {
|
||||
tprintf("Checking blob:");
|
||||
blob_box.print();
|
||||
tprintf("Current miss metric = %g, next = %g\n",
|
||||
current_box_miss_metric, next_box_miss_metric);
|
||||
}
|
||||
if (current_box_miss_metric > next_box_miss_metric)
|
||||
break; // Blob is a better match for next box.
|
||||
char_box += blob_box;
|
||||
}
|
||||
if (blob_count > 0) {
|
||||
if (applybox_debug > 1) {
|
||||
tprintf("Index [%d, %d) seem good.\n", i, i + blob_count);
|
||||
}
|
||||
if (!char_box.almost_equal(box, 3) &&
|
||||
(box.x_gap(next_box) < -3 ||
|
||||
(prev_box != NULL && prev_box->x_gap(box) < -3))) {
|
||||
return false;
|
||||
}
|
||||
// We refine just the box_word, best_state and correct_text here.
|
||||
// The rebuild_word is made in TidyUp.
|
||||
// blob_count blobs are put together to match the box. Merge the
|
||||
// box_word boxes, save the blob_count in the state and the text.
|
||||
word_res->box_word->MergeBoxes(i, i + blob_count);
|
||||
word_res->best_state[i] = blob_count;
|
||||
word_res->correct_text[i] = correct_text;
|
||||
if (applybox_debug > 2) {
|
||||
tprintf("%d Blobs match: blob box:", blob_count);
|
||||
word_res->box_word->BlobBox(i).print();
|
||||
tprintf("Matches box:");
|
||||
box.print();
|
||||
tprintf("With next box:");
|
||||
next_box.print();
|
||||
}
|
||||
// Eliminated best_state and correct_text entries for the consumed
|
||||
// blobs.
|
||||
for (int j = 1; j < blob_count; ++j) {
|
||||
word_res->best_state.remove(i + 1);
|
||||
word_res->correct_text.remove(i + 1);
|
||||
}
|
||||
// Assume that no box spans multiple source words, so we are done with
|
||||
// this box.
|
||||
if (applybox_debug > 1) {
|
||||
tprintf("Best state = ");
|
||||
for (int j = 0; j < word_res->best_state.size(); ++j) {
|
||||
tprintf("%d ", word_res->best_state[j]);
|
||||
}
|
||||
tprintf("\n");
|
||||
tprintf("Correct text = [[ ");
|
||||
for (int j = 0; j < word_res->correct_text.size(); ++j) {
|
||||
tprintf("%s ", word_res->correct_text[j].string());
|
||||
}
|
||||
tprintf("]]\n");
|
||||
}
|
||||
return true;
|
||||
}
|
||||
}
|
||||
}
|
||||
if (applybox_debug > 0) {
|
||||
tprintf("FAIL!\n");
|
||||
}
|
||||
return false; // Failure.
|
||||
}
|
||||
|
||||
/// Consume all source blobs that strongly overlap the given box,
|
||||
/// putting them into a new word, with the correct_text label.
|
||||
/// Fights over which box owns which blobs are settled by
|
||||
/// applying the blobs to box or next_box with the least non-overlap.
|
||||
/// @return false if the box was in error, which can only be caused by
|
||||
/// failing to find an overlapping blob for a box.
|
||||
bool Tesseract::ResegmentWordBox(BLOCK_LIST *block_list,
|
||||
const TBOX& box, const TBOX& next_box,
|
||||
const char* correct_text) {
|
||||
if (applybox_debug > 1) {
|
||||
tprintf("\nAPPLY_BOX: in ResegmentWordBox() for %s\n", correct_text);
|
||||
}
|
||||
WERD* new_word = NULL;
|
||||
BLOCK_IT b_it(block_list);
|
||||
for (b_it.mark_cycle_pt(); !b_it.cycled_list(); b_it.forward()) {
|
||||
BLOCK* block = b_it.data();
|
||||
if (!box.major_overlap(block->bounding_box()))
|
||||
continue;
|
||||
ROW_IT r_it(block->row_list());
|
||||
for (r_it.mark_cycle_pt(); !r_it.cycled_list(); r_it.forward()) {
|
||||
ROW* row = r_it.data();
|
||||
if (!box.major_overlap(row->bounding_box()))
|
||||
continue;
|
||||
WERD_IT w_it(row->word_list());
|
||||
for (w_it.mark_cycle_pt(); !w_it.cycled_list(); w_it.forward()) {
|
||||
WERD* word = w_it.data();
|
||||
if (applybox_debug > 2) {
|
||||
tprintf("Checking word:");
|
||||
word->bounding_box().print();
|
||||
}
|
||||
if (word->text() != NULL && word->text()[0] != '\0')
|
||||
continue; // Ignore words that are already done.
|
||||
if (!box.major_overlap(word->bounding_box()))
|
||||
continue;
|
||||
C_BLOB_IT blob_it(word->cblob_list());
|
||||
for (blob_it.mark_cycle_pt(); !blob_it.cycled_list();
|
||||
blob_it.forward()) {
|
||||
C_BLOB* blob = blob_it.data();
|
||||
TBOX blob_box = blob->bounding_box();
|
||||
if (!blob_box.major_overlap(box))
|
||||
continue;
|
||||
double current_box_miss_metric = BoxMissMetric(blob_box, box);
|
||||
double next_box_miss_metric = BoxMissMetric(blob_box, next_box);
|
||||
if (applybox_debug > 2) {
|
||||
tprintf("Checking blob:");
|
||||
blob_box.print();
|
||||
tprintf("Current miss metric = %g, next = %g\n",
|
||||
current_box_miss_metric, next_box_miss_metric);
|
||||
}
|
||||
if (current_box_miss_metric > next_box_miss_metric)
|
||||
continue; // Blob is a better match for next box.
|
||||
if (applybox_debug > 2) {
|
||||
tprintf("Blob match: blob:");
|
||||
blob_box.print();
|
||||
tprintf("Matches box:");
|
||||
box.print();
|
||||
tprintf("With next box:");
|
||||
next_box.print();
|
||||
}
|
||||
if (new_word == NULL) {
|
||||
// Make a new word with a single blob.
|
||||
new_word = word->shallow_copy();
|
||||
new_word->set_text(correct_text);
|
||||
w_it.add_to_end(new_word);
|
||||
}
|
||||
C_BLOB_IT new_blob_it(new_word->cblob_list());
|
||||
new_blob_it.add_to_end(blob_it.extract());
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
if (new_word == NULL && applybox_debug > 0) tprintf("FAIL!\n");
|
||||
return new_word != NULL;
|
||||
}
|
||||
|
||||
/// Resegments the words by running the classifier in an attempt to find the
|
||||
/// correct segmentation that produces the required string.
|
||||
void Tesseract::ReSegmentByClassification(PAGE_RES* page_res) {
|
||||
PAGE_RES_IT pr_it(page_res);
|
||||
WERD_RES* word_res;
|
||||
for (; (word_res = pr_it.word()) != NULL; pr_it.forward()) {
|
||||
WERD* word = word_res->word;
|
||||
if (word->text() == NULL || word->text()[0] == '\0')
|
||||
continue; // Ignore words that have no text.
|
||||
// Convert the correct text to a vector of UNICHAR_ID
|
||||
GenericVector<UNICHAR_ID> target_text;
|
||||
if (!ConvertStringToUnichars(word->text(), &target_text)) {
|
||||
tprintf("APPLY_BOX: FAILURE: can't find class_id for '%s'\n",
|
||||
word->text());
|
||||
pr_it.DeleteCurrentWord();
|
||||
continue;
|
||||
}
|
||||
if (!FindSegmentation(target_text, word_res)) {
|
||||
tprintf("APPLY_BOX: FAILURE: can't find segmentation for '%s'\n",
|
||||
word->text());
|
||||
pr_it.DeleteCurrentWord();
|
||||
continue;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Converts the space-delimited string of utf8 text to a vector of UNICHAR_ID.
|
||||
/// @return false if an invalid UNICHAR_ID is encountered.
|
||||
bool Tesseract::ConvertStringToUnichars(const char* utf8,
|
||||
GenericVector<UNICHAR_ID>* class_ids) {
|
||||
for (int step = 0; *utf8 != '\0'; utf8 += step) {
|
||||
const char* next_space = strchr(utf8, ' ');
|
||||
if (next_space == NULL)
|
||||
next_space = utf8 + strlen(utf8);
|
||||
step = next_space - utf8;
|
||||
UNICHAR_ID class_id = unicharset.unichar_to_id(utf8, step);
|
||||
if (class_id == INVALID_UNICHAR_ID) {
|
||||
return false;
|
||||
}
|
||||
while (utf8[step] == ' ')
|
||||
++step;
|
||||
class_ids->push_back(class_id);
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
/// Resegments the word to achieve the target_text from the classifier.
|
||||
/// Returns false if the re-segmentation fails.
|
||||
/// Uses brute-force combination of up to #kMaxGroupSize adjacent blobs, and
|
||||
/// applies a full search on the classifier results to find the best classified
|
||||
/// segmentation. As a compromise to obtain better recall, 1-1 ambiguity
|
||||
/// substitutions ARE used.
|
||||
bool Tesseract::FindSegmentation(const GenericVector<UNICHAR_ID>& target_text,
|
||||
WERD_RES* word_res) {
|
||||
// Classify all required combinations of blobs and save results in choices.
|
||||
int word_length = word_res->box_word->length();
|
||||
GenericVector<BLOB_CHOICE_LIST*>* choices =
|
||||
new GenericVector<BLOB_CHOICE_LIST*>[word_length];
|
||||
for (int i = 0; i < word_length; ++i) {
|
||||
for (int j = 1; j <= kMaxGroupSize && i + j <= word_length; ++j) {
|
||||
BLOB_CHOICE_LIST* match_result = classify_piece(
|
||||
word_res->seam_array, i, i + j - 1, "Applybox",
|
||||
word_res->chopped_word, word_res->blamer_bundle);
|
||||
if (applybox_debug > 2) {
|
||||
tprintf("%d+%d:", i, j);
|
||||
print_ratings_list("Segment:", match_result, unicharset);
|
||||
}
|
||||
choices[i].push_back(match_result);
|
||||
}
|
||||
}
|
||||
// Search the segmentation graph for the target text. Must be an exact
|
||||
// match. Using wildcards makes it difficult to find the correct
|
||||
// segmentation even when it is there.
|
||||
word_res->best_state.clear();
|
||||
GenericVector<int> search_segmentation;
|
||||
float best_rating = 0.0f;
|
||||
SearchForText(choices, 0, word_length, target_text, 0, 0.0f,
|
||||
&search_segmentation, &best_rating, &word_res->best_state);
|
||||
for (int i = 0; i < word_length; ++i)
|
||||
choices[i].delete_data_pointers();
|
||||
delete[] choices;
|
||||
if (word_res->best_state.empty()) {
|
||||
// Build the original segmentation and if it is the same length as the
|
||||
// truth, assume it will do.
|
||||
int blob_count = 1;
|
||||
for (int s = 0; s < word_res->seam_array.size(); ++s) {
|
||||
SEAM* seam = word_res->seam_array[s];
|
||||
if (!seam->HasAnySplits()) {
|
||||
word_res->best_state.push_back(blob_count);
|
||||
blob_count = 1;
|
||||
}
|
||||
else {
|
||||
++blob_count;
|
||||
}
|
||||
}
|
||||
word_res->best_state.push_back(blob_count);
|
||||
if (word_res->best_state.size() != target_text.size()) {
|
||||
word_res->best_state.clear(); // No good. Original segmentation bad size.
|
||||
return false;
|
||||
}
|
||||
}
|
||||
word_res->correct_text.clear();
|
||||
for (int i = 0; i < target_text.size(); ++i) {
|
||||
word_res->correct_text.push_back(
|
||||
STRING(unicharset.id_to_unichar(target_text[i])));
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
/// Recursive helper to find a match to the target_text (from text_index
|
||||
/// position) in the choices (from choices_pos position).
|
||||
/// @param choices is an array of GenericVectors, of length choices_length,
|
||||
/// with each element representing a starting position in the word, and the
|
||||
/// #GenericVector holding classification results for a sequence of consecutive
|
||||
/// blobs, with index 0 being a single blob, index 1 being 2 blobs etc.
|
||||
/// @param choices_pos
|
||||
/// @param choices_length
|
||||
/// @param target_text
|
||||
/// @param text_index
|
||||
/// @param rating
|
||||
/// @param segmentation
|
||||
/// @param best_rating
|
||||
/// @param best_segmentation
|
||||
void Tesseract::SearchForText(const GenericVector<BLOB_CHOICE_LIST*>* choices,
|
||||
int choices_pos, int choices_length,
|
||||
const GenericVector<UNICHAR_ID>& target_text,
|
||||
int text_index,
|
||||
float rating, GenericVector<int>* segmentation,
|
||||
float* best_rating,
|
||||
GenericVector<int>* best_segmentation) {
|
||||
const UnicharAmbigsVector& table = getDict().getUnicharAmbigs().dang_ambigs();
|
||||
for (int length = 1; length <= choices[choices_pos].size(); ++length) {
|
||||
// Rating of matching choice or worst choice if no match.
|
||||
float choice_rating = 0.0f;
|
||||
// Find the corresponding best BLOB_CHOICE.
|
||||
BLOB_CHOICE_IT choice_it(choices[choices_pos][length - 1]);
|
||||
for (choice_it.mark_cycle_pt(); !choice_it.cycled_list();
|
||||
choice_it.forward()) {
|
||||
BLOB_CHOICE* choice = choice_it.data();
|
||||
choice_rating = choice->rating();
|
||||
UNICHAR_ID class_id = choice->unichar_id();
|
||||
if (class_id == target_text[text_index]) {
|
||||
break;
|
||||
}
|
||||
// Search ambigs table.
|
||||
if (class_id < table.size() && table[class_id] != NULL) {
|
||||
AmbigSpec_IT spec_it(table[class_id]);
|
||||
for (spec_it.mark_cycle_pt(); !spec_it.cycled_list();
|
||||
spec_it.forward()) {
|
||||
const AmbigSpec *ambig_spec = spec_it.data();
|
||||
// We'll only do 1-1.
|
||||
if (ambig_spec->wrong_ngram[1] == INVALID_UNICHAR_ID &&
|
||||
ambig_spec->correct_ngram_id == target_text[text_index])
|
||||
break;
|
||||
}
|
||||
if (!spec_it.cycled_list())
|
||||
break; // Found an ambig.
|
||||
}
|
||||
}
|
||||
if (choice_it.cycled_list())
|
||||
continue; // No match.
|
||||
segmentation->push_back(length);
|
||||
if (choices_pos + length == choices_length &&
|
||||
text_index + 1 == target_text.size()) {
|
||||
// This is a complete match. If the rating is good record a new best.
|
||||
if (applybox_debug > 2) {
|
||||
tprintf("Complete match, rating = %g, best=%g, seglength=%d, best=%d\n",
|
||||
rating + choice_rating, *best_rating, segmentation->size(),
|
||||
best_segmentation->size());
|
||||
}
|
||||
if (best_segmentation->empty() || rating + choice_rating < *best_rating) {
|
||||
*best_segmentation = *segmentation;
|
||||
*best_rating = rating + choice_rating;
|
||||
}
|
||||
}
|
||||
else if (choices_pos + length < choices_length &&
|
||||
text_index + 1 < target_text.size()) {
|
||||
if (applybox_debug > 3) {
|
||||
tprintf("Match found for %d=%s:%s, at %d+%d, recursing...\n",
|
||||
target_text[text_index],
|
||||
unicharset.id_to_unichar(target_text[text_index]),
|
||||
choice_it.data()->unichar_id() == target_text[text_index]
|
||||
? "Match" : "Ambig",
|
||||
choices_pos, length);
|
||||
}
|
||||
SearchForText(choices, choices_pos + length, choices_length, target_text,
|
||||
text_index + 1, rating + choice_rating, segmentation,
|
||||
best_rating, best_segmentation);
|
||||
if (applybox_debug > 3) {
|
||||
tprintf("End recursion for %d=%s\n", target_text[text_index],
|
||||
unicharset.id_to_unichar(target_text[text_index]));
|
||||
}
|
||||
}
|
||||
segmentation->truncate(segmentation->size() - 1);
|
||||
}
|
||||
}
|
||||
|
||||
/// - Counts up the labelled words and the blobs within.
|
||||
/// - Deletes all unused or emptied words, counting the unused ones.
|
||||
/// - Resets W_BOL and W_EOL flags correctly.
|
||||
/// - Builds the rebuild_word and rebuilds the box_word and the best_choice.
|
||||
void Tesseract::TidyUp(PAGE_RES* page_res) {
|
||||
int ok_blob_count = 0;
|
||||
int bad_blob_count = 0;
|
||||
int ok_word_count = 0;
|
||||
int unlabelled_words = 0;
|
||||
PAGE_RES_IT pr_it(page_res);
|
||||
WERD_RES* word_res;
|
||||
for (; (word_res = pr_it.word()) != NULL; pr_it.forward()) {
|
||||
int ok_in_word = 0;
|
||||
int blob_count = word_res->correct_text.size();
|
||||
WERD_CHOICE* word_choice = new WERD_CHOICE(word_res->uch_set, blob_count);
|
||||
word_choice->set_permuter(TOP_CHOICE_PERM);
|
||||
for (int c = 0; c < blob_count; ++c) {
|
||||
if (word_res->correct_text[c].length() > 0) {
|
||||
++ok_in_word;
|
||||
}
|
||||
// Since we only need a fake word_res->best_choice, the actual
|
||||
// unichar_ids do not matter. Which is fortunate, since TidyUp()
|
||||
// can be called while training Tesseract, at the stage where
|
||||
// unicharset is not meaningful yet.
|
||||
word_choice->append_unichar_id_space_allocated(
|
||||
INVALID_UNICHAR_ID, word_res->best_state[c], 1.0f, -1.0f);
|
||||
}
|
||||
if (ok_in_word > 0) {
|
||||
ok_blob_count += ok_in_word;
|
||||
bad_blob_count += word_res->correct_text.size() - ok_in_word;
|
||||
word_res->LogNewRawChoice(word_choice);
|
||||
word_res->LogNewCookedChoice(1, false, word_choice);
|
||||
}
|
||||
else {
|
||||
++unlabelled_words;
|
||||
if (applybox_debug > 0) {
|
||||
tprintf("APPLY_BOXES: Unlabelled word at :");
|
||||
word_res->word->bounding_box().print();
|
||||
}
|
||||
pr_it.DeleteCurrentWord();
|
||||
delete word_choice;
|
||||
}
|
||||
}
|
||||
pr_it.restart_page();
|
||||
for (; (word_res = pr_it.word()) != NULL; pr_it.forward()) {
|
||||
// Denormalize back to a BoxWord.
|
||||
word_res->RebuildBestState();
|
||||
word_res->SetupBoxWord();
|
||||
word_res->word->set_flag(W_BOL, pr_it.prev_row() != pr_it.row());
|
||||
word_res->word->set_flag(W_EOL, pr_it.next_row() != pr_it.row());
|
||||
}
|
||||
if (applybox_debug > 0) {
|
||||
tprintf(" Found %d good blobs.\n", ok_blob_count);
|
||||
if (bad_blob_count > 0) {
|
||||
tprintf(" Leaving %d unlabelled blobs in %d words.\n",
|
||||
bad_blob_count, ok_word_count);
|
||||
}
|
||||
if (unlabelled_words > 0)
|
||||
tprintf(" %d remaining unlabelled words deleted.\n", unlabelled_words);
|
||||
}
|
||||
}
|
||||
|
||||
/** Logs a bad box by line in the box file and box coords.*/
|
||||
void Tesseract::ReportFailedBox(int boxfile_lineno, TBOX box,
|
||||
const char *box_ch, const char *err_msg) {
|
||||
tprintf("APPLY_BOXES: boxfile line %d/%s ((%d,%d),(%d,%d)): %s\n",
|
||||
boxfile_lineno + 1, box_ch,
|
||||
box.left(), box.bottom(), box.right(), box.top(), err_msg);
|
||||
}
|
||||
|
||||
/** Creates a fake best_choice entry in each WERD_RES with the correct text.*/
|
||||
void Tesseract::CorrectClassifyWords(PAGE_RES* page_res) {
|
||||
PAGE_RES_IT pr_it(page_res);
|
||||
for (WERD_RES *word_res = pr_it.word(); word_res != NULL;
|
||||
word_res = pr_it.forward()) {
|
||||
WERD_CHOICE* choice = new WERD_CHOICE(word_res->uch_set,
|
||||
word_res->correct_text.size());
|
||||
for (int i = 0; i < word_res->correct_text.size(); ++i) {
|
||||
// The part before the first space is the real ground truth, and the
|
||||
// rest is the bounding box location and page number.
|
||||
GenericVector<STRING> tokens;
|
||||
word_res->correct_text[i].split(' ', &tokens);
|
||||
UNICHAR_ID char_id = unicharset.unichar_to_id(tokens[0].string());
|
||||
choice->append_unichar_id_space_allocated(char_id,
|
||||
word_res->best_state[i],
|
||||
0.0f, 0.0f);
|
||||
}
|
||||
word_res->ClearWordChoices();
|
||||
word_res->LogNewRawChoice(choice);
|
||||
word_res->LogNewCookedChoice(1, false, choice);
|
||||
}
|
||||
}
|
||||
|
||||
/// Calls #LearnWord to extract features for labelled blobs within each word.
|
||||
/// Features are stored in an internal buffer.
|
||||
void Tesseract::ApplyBoxTraining(const STRING& fontname, PAGE_RES* page_res) {
|
||||
PAGE_RES_IT pr_it(page_res);
|
||||
int word_count = 0;
|
||||
for (WERD_RES *word_res = pr_it.word(); word_res != NULL;
|
||||
word_res = pr_it.forward()) {
|
||||
LearnWord(fontname.string(), word_res);
|
||||
++word_count;
|
||||
}
|
||||
tprintf("Generated training data for %d words\n", word_count);
|
||||
}
|
||||
|
||||
|
||||
} // namespace tesseract
|
File diff suppressed because it is too large
Load Diff
|
@ -0,0 +1,44 @@
|
|||
/**********************************************************************
|
||||
* File: control.h (Formerly control.h)
|
||||
* Description: Module-independent matcher controller.
|
||||
* Author: Ray Smith
|
||||
* Created: Thu Apr 23 11:09:58 BST 1992
|
||||
*
|
||||
* (C) Copyright 1992, Hewlett-Packard Ltd.
|
||||
** Licensed under the Apache License, Version 2.0 (the "License");
|
||||
** you may not use this file except in compliance with the License.
|
||||
** You may obtain a copy of the License at
|
||||
** http://www.apache.org/licenses/LICENSE-2.0
|
||||
** Unless required by applicable law or agreed to in writing, software
|
||||
** distributed under the License is distributed on an "AS IS" BASIS,
|
||||
** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
** See the License for the specific language governing permissions and
|
||||
** limitations under the License.
|
||||
*
|
||||
**********************************************************************/
|
||||
|
||||
/**
|
||||
* @file control.h
|
||||
* Module-independent matcher controller.
|
||||
*/
|
||||
|
||||
#ifndef CONTROL_H
|
||||
#define CONTROL_H
|
||||
|
||||
#include "params.h"
|
||||
#include "ocrblock.h"
|
||||
#include "ratngs.h"
|
||||
#include "statistc.h"
|
||||
#include "pageres.h"
|
||||
|
||||
enum ACCEPTABLE_WERD_TYPE
|
||||
{
|
||||
AC_UNACCEPTABLE, ///< Unacceptable word
|
||||
AC_LOWER_CASE, ///< ALL lower case
|
||||
AC_UPPER_CASE, ///< ALL upper case
|
||||
AC_INITIAL_CAP, ///< ALL but initial lc
|
||||
AC_LC_ABBREV, ///< a.b.c.
|
||||
AC_UC_ABBREV ///< A.B.C.
|
||||
};
|
||||
|
||||
#endif
|
|
@ -0,0 +1,440 @@
|
|||
/******************************************************************
|
||||
* File: cube_control.cpp
|
||||
* Description: Tesseract class methods for invoking cube convolutional
|
||||
* neural network word recognizer.
|
||||
* Author: Raquel Romano
|
||||
* Created: September 2009
|
||||
*
|
||||
* (C) Copyright 2009, Google Inc.
|
||||
** Licensed under the Apache License, Version 2.0 (the "License");
|
||||
** you may not use this file except in compliance with the License.
|
||||
** You may obtain a copy of the License at
|
||||
** http://www.apache.org/licenses/LICENSE-2.0
|
||||
** Unless required by applicable law or agreed to in writing, software
|
||||
** distributed under the License is distributed on an "AS IS" BASIS,
|
||||
** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
** See the License for the specific language governing permissions and
|
||||
** limitations under the License.
|
||||
**********************************************************************/
|
||||
|
||||
// Include automatically generated configuration file if running autoconf.
|
||||
#ifdef HAVE_CONFIG_H
|
||||
#include "config_auto.h"
|
||||
#endif
|
||||
|
||||
#include "allheaders.h"
|
||||
|
||||
#include "cube_object.h"
|
||||
#include "cube_reco_context.h"
|
||||
#include "tesseractclass.h"
|
||||
#include "tesseract_cube_combiner.h"
|
||||
|
||||
namespace tesseract {
|
||||
|
||||
/**
|
||||
* @name convert_prob_to_tess_certainty
|
||||
*
|
||||
* Normalize a probability in the range [0.0, 1.0] to a tesseract
|
||||
* certainty in the range [-20.0, 0.0]
|
||||
*/
|
||||
static float convert_prob_to_tess_certainty(float prob) {
|
||||
return (prob - 1.0) * 20.0;
|
||||
}
|
||||
|
||||
/**
|
||||
* @name char_box_to_tbox
|
||||
*
|
||||
* Create a TBOX from a character bounding box. If nonzero, the
|
||||
* x_offset accounts for any additional padding of the word box that
|
||||
* should be taken into account.
|
||||
*
|
||||
*/
|
||||
TBOX char_box_to_tbox(Box* char_box, TBOX word_box, int x_offset) {
|
||||
l_int32 left;
|
||||
l_int32 top;
|
||||
l_int32 width;
|
||||
l_int32 height;
|
||||
l_int32 right;
|
||||
l_int32 bottom;
|
||||
|
||||
boxGetGeometry(char_box, &left, &top, &width, &height);
|
||||
left += word_box.left() - x_offset;
|
||||
right = left + width;
|
||||
top = word_box.bottom() + word_box.height() - top;
|
||||
bottom = top - height;
|
||||
return TBOX(left, bottom, right, top);
|
||||
}
|
||||
|
||||
/**
|
||||
* @name extract_cube_state
|
||||
*
|
||||
* Extract CharSamp objects and character bounding boxes from the
|
||||
* CubeObject's state. The caller should free both structres.
|
||||
*
|
||||
*/
|
||||
bool Tesseract::extract_cube_state(CubeObject* cube_obj,
|
||||
int* num_chars,
|
||||
Boxa** char_boxes,
|
||||
CharSamp*** char_samples) {
|
||||
if (!cube_obj) {
|
||||
if (cube_debug_level > 0) {
|
||||
tprintf("Cube WARNING (extract_cube_state): Invalid cube object "
|
||||
"passed to extract_cube_state\n");
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
// Note that the CubeObject accessors return either the deslanted or
|
||||
// regular objects search object or beam search object, whichever
|
||||
// was used in the last call to Recognize()
|
||||
CubeSearchObject* cube_search_obj = cube_obj->SrchObj();
|
||||
if (!cube_search_obj) {
|
||||
if (cube_debug_level > 0) {
|
||||
tprintf("Cube WARNING (Extract_cube_state): Could not retrieve "
|
||||
"cube's search object in extract_cube_state.\n");
|
||||
}
|
||||
return false;
|
||||
}
|
||||
BeamSearch *beam_search_obj = cube_obj->BeamObj();
|
||||
if (!beam_search_obj) {
|
||||
if (cube_debug_level > 0) {
|
||||
tprintf("Cube WARNING (Extract_cube_state): Could not retrieve "
|
||||
"cube's beam search object in extract_cube_state.\n");
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
// Get the character samples and bounding boxes by backtracking
|
||||
// through the beam search path
|
||||
int best_node_index = beam_search_obj->BestPresortedNodeIndex();
|
||||
*char_samples = beam_search_obj->BackTrack(
|
||||
cube_search_obj, best_node_index, num_chars, NULL, char_boxes);
|
||||
if (!*char_samples)
|
||||
return false;
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* @name create_cube_box_word
|
||||
*
|
||||
* Fill the given BoxWord with boxes from character bounding
|
||||
* boxes. The char_boxes have local coordinates w.r.t. the
|
||||
* word bounding box, i.e., the left-most character bbox of each word
|
||||
* has (0,0) left-top coord, but the BoxWord must be defined in page
|
||||
* coordinates.
|
||||
*/
|
||||
bool Tesseract::create_cube_box_word(Boxa *char_boxes,
|
||||
int num_chars,
|
||||
TBOX word_box,
|
||||
BoxWord* box_word) {
|
||||
if (!box_word) {
|
||||
if (cube_debug_level > 0) {
|
||||
tprintf("Cube WARNING (create_cube_box_word): Invalid box_word.\n");
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
// Find the x-coordinate of left-most char_box, which could be
|
||||
// nonzero if the word image was padded before recognition took place.
|
||||
int x_offset = -1;
|
||||
for (int i = 0; i < num_chars; ++i) {
|
||||
Box* char_box = boxaGetBox(char_boxes, i, L_CLONE);
|
||||
if (x_offset < 0 || char_box->x < x_offset) {
|
||||
x_offset = char_box->x;
|
||||
}
|
||||
boxDestroy(&char_box);
|
||||
}
|
||||
|
||||
for (int i = 0; i < num_chars; ++i) {
|
||||
Box* char_box = boxaGetBox(char_boxes, i, L_CLONE);
|
||||
TBOX tbox = char_box_to_tbox(char_box, word_box, x_offset);
|
||||
boxDestroy(&char_box);
|
||||
box_word->InsertBox(i, tbox);
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* @name init_cube_objects
|
||||
*
|
||||
* Instantiates Tesseract object's CubeRecoContext and TesseractCubeCombiner.
|
||||
* Returns false if cube context could not be created or if load_combiner is
|
||||
* true, but the combiner could not be loaded.
|
||||
*/
|
||||
bool Tesseract::init_cube_objects(bool load_combiner,
|
||||
TessdataManager *tessdata_manager) {
|
||||
ASSERT_HOST(cube_cntxt_ == NULL);
|
||||
ASSERT_HOST(tess_cube_combiner_ == NULL);
|
||||
|
||||
// Create the cube context object
|
||||
cube_cntxt_ = CubeRecoContext::Create(this, tessdata_manager, &unicharset);
|
||||
if (cube_cntxt_ == NULL) {
|
||||
if (cube_debug_level > 0) {
|
||||
tprintf("Cube WARNING (Tesseract::init_cube_objects()): Failed to "
|
||||
"instantiate CubeRecoContext\n");
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
// Create the combiner object and load the combiner net for target languages.
|
||||
if (load_combiner) {
|
||||
tess_cube_combiner_ = new tesseract::TesseractCubeCombiner(cube_cntxt_);
|
||||
if (!tess_cube_combiner_->LoadCombinerNet()) {
|
||||
delete cube_cntxt_;
|
||||
cube_cntxt_ = NULL;
|
||||
delete tess_cube_combiner_;
|
||||
tess_cube_combiner_ = NULL;
|
||||
if (cube_debug_level > 0)
|
||||
tprintf("Cube ERROR (Failed to instantiate TesseractCubeCombiner\n");
|
||||
return false;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* @name run_cube_combiner
|
||||
*
|
||||
* Iterates through tesseract's results and calls cube on each word,
|
||||
* combining the results with the existing tesseract result.
|
||||
*/
|
||||
void Tesseract::run_cube_combiner(PAGE_RES *page_res) {
|
||||
if (page_res == NULL || tess_cube_combiner_ == NULL)
|
||||
return;
|
||||
PAGE_RES_IT page_res_it(page_res);
|
||||
// Iterate through the word results and call cube on each word.
|
||||
for (page_res_it.restart_page(); page_res_it.word() != NULL;
|
||||
page_res_it.forward()) {
|
||||
BLOCK* block = page_res_it.block()->block;
|
||||
if (block->poly_block() != NULL && !block->poly_block()->IsText())
|
||||
continue; // Don't deal with non-text blocks.
|
||||
WERD_RES* word = page_res_it.word();
|
||||
// Skip cube entirely if tesseract's certainty is greater than threshold.
|
||||
int combiner_run_thresh = convert_prob_to_tess_certainty(
|
||||
cube_cntxt_->Params()->CombinerRunThresh());
|
||||
if (word->best_choice->certainty() >= combiner_run_thresh) {
|
||||
continue;
|
||||
}
|
||||
// Use the same language as Tesseract used for the word.
|
||||
Tesseract* lang_tess = word->tesseract;
|
||||
|
||||
// Setup a trial WERD_RES in which to classify with cube.
|
||||
WERD_RES cube_word;
|
||||
cube_word.InitForRetryRecognition(*word);
|
||||
cube_word.SetupForRecognition(lang_tess->unicharset, this, BestPix(),
|
||||
OEM_CUBE_ONLY,
|
||||
NULL, false, false, false,
|
||||
page_res_it.row()->row,
|
||||
page_res_it.block()->block);
|
||||
CubeObject *cube_obj = lang_tess->cube_recognize_word(
|
||||
page_res_it.block()->block, &cube_word);
|
||||
if (cube_obj != NULL)
|
||||
lang_tess->cube_combine_word(cube_obj, &cube_word, word);
|
||||
delete cube_obj;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* @name cube_word_pass1
|
||||
*
|
||||
* Recognizes a single word using (only) cube. Compatible with
|
||||
* Tesseract's classify_word_pass1/classify_word_pass2.
|
||||
*/
|
||||
void Tesseract::cube_word_pass1(BLOCK* block, ROW *row, WERD_RES *word) {
|
||||
CubeObject *cube_obj = cube_recognize_word(block, word);
|
||||
delete cube_obj;
|
||||
}
|
||||
|
||||
/**
|
||||
* @name cube_recognize_word
|
||||
*
|
||||
* Cube recognizer to recognize a single word as with classify_word_pass1
|
||||
* but also returns the cube object in case the combiner is needed.
|
||||
*/
|
||||
CubeObject* Tesseract::cube_recognize_word(BLOCK* block, WERD_RES* word) {
|
||||
if (!cube_binary_ || !cube_cntxt_) {
|
||||
if (cube_debug_level > 0 && !cube_binary_)
|
||||
tprintf("Tesseract::run_cube(): NULL binary image.\n");
|
||||
word->SetupFake(unicharset);
|
||||
return NULL;
|
||||
}
|
||||
TBOX word_box = word->word->bounding_box();
|
||||
if (block != NULL && (block->re_rotation().x() != 1.0f ||
|
||||
block->re_rotation().y() != 0.0f)) {
|
||||
// TODO(rays) We have to rotate the bounding box to get the true coords.
|
||||
// This will be achieved in the future via DENORM.
|
||||
// In the mean time, cube can't process this word.
|
||||
if (cube_debug_level > 0) {
|
||||
tprintf("Cube can't process rotated word at:");
|
||||
word_box.print();
|
||||
}
|
||||
word->SetupFake(unicharset);
|
||||
return NULL;
|
||||
}
|
||||
CubeObject* cube_obj = new tesseract::CubeObject(
|
||||
cube_cntxt_, cube_binary_, word_box.left(),
|
||||
pixGetHeight(cube_binary_) - word_box.top(),
|
||||
word_box.width(), word_box.height());
|
||||
if (!cube_recognize(cube_obj, block, word)) {
|
||||
delete cube_obj;
|
||||
return NULL;
|
||||
}
|
||||
return cube_obj;
|
||||
}
|
||||
|
||||
/**
|
||||
* @name cube_combine_word
|
||||
*
|
||||
* Combines the cube and tesseract results for a single word, leaving the
|
||||
* result in tess_word.
|
||||
*/
|
||||
void Tesseract::cube_combine_word(CubeObject* cube_obj, WERD_RES* cube_word,
|
||||
WERD_RES* tess_word) {
|
||||
float combiner_prob = tess_cube_combiner_->CombineResults(tess_word,
|
||||
cube_obj);
|
||||
// If combiner probability is greater than tess/cube combiner
|
||||
// classifier threshold, i.e. tesseract wins, then just return the
|
||||
// tesseract result unchanged, as the combiner knows nothing about how
|
||||
// correct the answer is. If cube and tesseract agree, then improve the
|
||||
// scores before returning.
|
||||
WERD_CHOICE* tess_best = tess_word->best_choice;
|
||||
WERD_CHOICE* cube_best = cube_word->best_choice;
|
||||
if (cube_debug_level || classify_debug_level) {
|
||||
tprintf("Combiner prob = %g vs threshold %g\n",
|
||||
combiner_prob, cube_cntxt_->Params()->CombinerClassifierThresh());
|
||||
}
|
||||
if (combiner_prob >=
|
||||
cube_cntxt_->Params()->CombinerClassifierThresh()) {
|
||||
if (tess_best->unichar_string() == cube_best->unichar_string()) {
|
||||
// Cube and tess agree, so improve the scores.
|
||||
tess_best->set_rating(tess_best->rating() / 2);
|
||||
tess_best->set_certainty(tess_best->certainty() / 2);
|
||||
}
|
||||
return;
|
||||
}
|
||||
// Cube wins.
|
||||
// It is better for the language combiner to have all tesseract scores,
|
||||
// so put them in the cube result.
|
||||
cube_best->set_rating(tess_best->rating());
|
||||
cube_best->set_certainty(tess_best->certainty());
|
||||
if (cube_debug_level || classify_debug_level) {
|
||||
tprintf("Cube INFO: tesseract result replaced by cube: %s -> %s\n",
|
||||
tess_best->unichar_string().string(),
|
||||
cube_best->unichar_string().string());
|
||||
}
|
||||
tess_word->ConsumeWordResults(cube_word);
|
||||
}
|
||||
|
||||
/**
|
||||
* @name cube_recognize
|
||||
*
|
||||
* Call cube on the current word, and write the result to word.
|
||||
* Sets up a fake result and returns false if something goes wrong.
|
||||
*/
|
||||
bool Tesseract::cube_recognize(CubeObject *cube_obj, BLOCK* block,
|
||||
WERD_RES *word) {
|
||||
// Run cube
|
||||
WordAltList *cube_alt_list = cube_obj->RecognizeWord();
|
||||
if (!cube_alt_list || cube_alt_list->AltCount() <= 0) {
|
||||
if (cube_debug_level > 0) {
|
||||
tprintf("Cube returned nothing for word at:");
|
||||
word->word->bounding_box().print();
|
||||
}
|
||||
word->SetupFake(unicharset);
|
||||
return false;
|
||||
}
|
||||
|
||||
// Get cube's best result and its probability, mapped to tesseract's
|
||||
// certainty range
|
||||
char_32 *cube_best_32 = cube_alt_list->Alt(0);
|
||||
double cube_prob = CubeUtils::Cost2Prob(cube_alt_list->AltCost(0));
|
||||
float cube_certainty = convert_prob_to_tess_certainty(cube_prob);
|
||||
string cube_best_str;
|
||||
CubeUtils::UTF32ToUTF8(cube_best_32, &cube_best_str);
|
||||
|
||||
// Retrieve Cube's character bounding boxes and CharSamples,
|
||||
// corresponding to the most recent call to RecognizeWord().
|
||||
Boxa *char_boxes = NULL;
|
||||
CharSamp **char_samples = NULL;;
|
||||
int num_chars;
|
||||
if (!extract_cube_state(cube_obj, &num_chars, &char_boxes, &char_samples)
|
||||
&& cube_debug_level > 0) {
|
||||
tprintf("Cube WARNING (Tesseract::cube_recognize): Cannot extract "
|
||||
"cube state.\n");
|
||||
word->SetupFake(unicharset);
|
||||
return false;
|
||||
}
|
||||
|
||||
// Convert cube's character bounding boxes to a BoxWord.
|
||||
BoxWord cube_box_word;
|
||||
TBOX tess_word_box = word->word->bounding_box();
|
||||
if (word->denorm.block() != NULL)
|
||||
tess_word_box.rotate(word->denorm.block()->re_rotation());
|
||||
bool box_word_success = create_cube_box_word(char_boxes, num_chars,
|
||||
tess_word_box,
|
||||
&cube_box_word);
|
||||
boxaDestroy(&char_boxes);
|
||||
if (!box_word_success) {
|
||||
if (cube_debug_level > 0) {
|
||||
tprintf("Cube WARNING (Tesseract::cube_recognize): Could not "
|
||||
"create cube BoxWord\n");
|
||||
}
|
||||
word->SetupFake(unicharset);
|
||||
return false;
|
||||
}
|
||||
|
||||
// Fill tesseract result's fields with cube results
|
||||
fill_werd_res(cube_box_word, cube_best_str.c_str(), word);
|
||||
|
||||
// Create cube's best choice.
|
||||
BLOB_CHOICE** choices = new BLOB_CHOICE*[num_chars];
|
||||
for (int i = 0; i < num_chars; ++i) {
|
||||
UNICHAR_ID uch_id =
|
||||
cube_cntxt_->CharacterSet()->UnicharID(char_samples[i]->StrLabel());
|
||||
choices[i] = new BLOB_CHOICE(uch_id, -cube_certainty, cube_certainty,
|
||||
-1, 0.0f, 0.0f, 0.0f, BCC_STATIC_CLASSIFIER);
|
||||
}
|
||||
word->FakeClassifyWord(num_chars, choices);
|
||||
// within a word, cube recognizes the word in reading order.
|
||||
word->best_choice->set_unichars_in_script_order(true);
|
||||
delete[] choices;
|
||||
delete[] char_samples;
|
||||
|
||||
// Some sanity checks
|
||||
ASSERT_HOST(word->best_choice->length() == word->reject_map.length());
|
||||
|
||||
if (cube_debug_level || classify_debug_level) {
|
||||
tprintf("Cube result: %s r=%g, c=%g\n",
|
||||
word->best_choice->unichar_string().string(),
|
||||
word->best_choice->rating(),
|
||||
word->best_choice->certainty());
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* @name fill_werd_res
|
||||
*
|
||||
* Fill Tesseract's word result fields with cube's.
|
||||
*
|
||||
*/
|
||||
void Tesseract::fill_werd_res(const BoxWord& cube_box_word,
|
||||
const char* cube_best_str,
|
||||
WERD_RES* tess_werd_res) {
|
||||
delete tess_werd_res->box_word;
|
||||
tess_werd_res->box_word = new BoxWord(cube_box_word);
|
||||
tess_werd_res->box_word->ClipToOriginalWord(tess_werd_res->denorm.block(),
|
||||
tess_werd_res->word);
|
||||
// Fill text and remaining fields
|
||||
tess_werd_res->word->set_text(cube_best_str);
|
||||
tess_werd_res->tess_failed = FALSE;
|
||||
tess_werd_res->tess_accepted = tess_acceptable_word(tess_werd_res);
|
||||
// There is no output word, so we can' call AdaptableWord, but then I don't
|
||||
// think we need to. Fudge the result with accepted.
|
||||
tess_werd_res->tess_would_adapt = tess_werd_res->tess_accepted;
|
||||
|
||||
// Set word to done, i.e., ignore all of tesseract's tests for rejection
|
||||
tess_werd_res->done = tess_werd_res->tess_accepted;
|
||||
}
|
||||
|
||||
} // namespace tesseract
|
|
@ -0,0 +1,184 @@
|
|||
/**********************************************************************
|
||||
* File: cube_reco_context.cpp
|
||||
* Description: Implementation of the Cube Recognition Context Class
|
||||
* Author: Ahmad Abdulkader
|
||||
* Created: 2007
|
||||
*
|
||||
* (C) Copyright 2008, Google Inc.
|
||||
** Licensed under the Apache License, Version 2.0 (the "License");
|
||||
** you may not use this file except in compliance with the License.
|
||||
** You may obtain a copy of the License at
|
||||
** http://www.apache.org/licenses/LICENSE-2.0
|
||||
** Unless required by applicable law or agreed to in writing, software
|
||||
** distributed under the License is distributed on an "AS IS" BASIS,
|
||||
** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
** See the License for the specific language governing permissions and
|
||||
** limitations under the License.
|
||||
*
|
||||
**********************************************************************/
|
||||
|
||||
#include <string>
|
||||
#include <limits.h>
|
||||
|
||||
#include "cube_reco_context.h"
|
||||
|
||||
#include "classifier_factory.h"
|
||||
#include "cube_tuning_params.h"
|
||||
#include "dict.h"
|
||||
#include "feature_bmp.h"
|
||||
#include "tessdatamanager.h"
|
||||
#include "tesseractclass.h"
|
||||
#include "tess_lang_model.h"
|
||||
|
||||
namespace tesseract {
|
||||
|
||||
/**
|
||||
* Instantiate a CubeRecoContext object using a Tesseract object.
|
||||
* CubeRecoContext will not take ownership of tess_obj, but will
|
||||
* record the pointer to it and will make use of various Tesseract
|
||||
* components (language model, flags, etc). Thus the caller should
|
||||
* keep tess_obj alive so long as the instantiated CubeRecoContext is used.
|
||||
*/
|
||||
CubeRecoContext::CubeRecoContext(Tesseract *tess_obj) {
|
||||
tess_obj_ = tess_obj;
|
||||
lang_ = "";
|
||||
loaded_ = false;
|
||||
lang_mod_ = NULL;
|
||||
params_ = NULL;
|
||||
char_classifier_ = NULL;
|
||||
char_set_ = NULL;
|
||||
word_size_model_ = NULL;
|
||||
char_bigrams_ = NULL;
|
||||
word_unigrams_ = NULL;
|
||||
noisy_input_ = false;
|
||||
size_normalization_ = false;
|
||||
}
|
||||
|
||||
CubeRecoContext::~CubeRecoContext() {
|
||||
delete char_classifier_;
|
||||
char_classifier_ = NULL;
|
||||
|
||||
delete word_size_model_;
|
||||
word_size_model_ = NULL;
|
||||
|
||||
delete char_set_;
|
||||
char_set_ = NULL;
|
||||
|
||||
delete char_bigrams_;
|
||||
char_bigrams_ = NULL;
|
||||
|
||||
delete word_unigrams_;
|
||||
word_unigrams_ = NULL;
|
||||
|
||||
delete lang_mod_;
|
||||
lang_mod_ = NULL;
|
||||
|
||||
delete params_;
|
||||
params_ = NULL;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the path of the data files by looking up the TESSDATA_PREFIX
|
||||
* environment variable and appending a "tessdata" directory to it
|
||||
*/
|
||||
bool CubeRecoContext::GetDataFilePath(string *path) const {
|
||||
*path = tess_obj_->datadir.string();
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* The object initialization function that loads all the necessary
|
||||
* components of a RecoContext. TessdataManager is used to load the
|
||||
* data from [lang].traineddata file. If TESSDATA_CUBE_UNICHARSET
|
||||
* component is present, Cube will be instantiated with the unicharset
|
||||
* specified in this component and the corresponding dictionary
|
||||
* (TESSDATA_CUBE_SYSTEM_DAWG), and will map Cube's unicharset to
|
||||
* Tesseract's. Otherwise, TessdataManager will assume that Cube will
|
||||
* be using Tesseract's unicharset and dawgs, and will load the
|
||||
* unicharset from the TESSDATA_UNICHARSET component and will load the
|
||||
* dawgs from TESSDATA_*_DAWG components.
|
||||
*/
|
||||
bool CubeRecoContext::Load(TessdataManager *tessdata_manager,
|
||||
UNICHARSET *tess_unicharset) {
|
||||
ASSERT_HOST(tess_obj_ != NULL);
|
||||
tess_unicharset_ = tess_unicharset;
|
||||
string data_file_path;
|
||||
|
||||
// Get the data file path.
|
||||
if (GetDataFilePath(&data_file_path) == false) {
|
||||
fprintf(stderr, "Unable to get data file path\n");
|
||||
return false;
|
||||
}
|
||||
|
||||
// Get the language from the Tesseract object.
|
||||
lang_ = tess_obj_->lang.string();
|
||||
|
||||
// Create the char set.
|
||||
if ((char_set_ =
|
||||
CharSet::Create(tessdata_manager, tess_unicharset)) == NULL) {
|
||||
fprintf(stderr, "Cube ERROR (CubeRecoContext::Load): unable to load "
|
||||
"CharSet\n");
|
||||
return false;
|
||||
}
|
||||
// Create the language model.
|
||||
string lm_file_name = data_file_path + lang_ + ".cube.lm";
|
||||
string lm_params;
|
||||
if (!CubeUtils::ReadFileToString(lm_file_name, &lm_params)) {
|
||||
fprintf(stderr, "Cube ERROR (CubeRecoContext::Load): unable to read cube "
|
||||
"language model params from %s\n", lm_file_name.c_str());
|
||||
return false;
|
||||
}
|
||||
lang_mod_ = new TessLangModel(lm_params, data_file_path,
|
||||
tess_obj_->getDict().load_system_dawg,
|
||||
tessdata_manager, this);
|
||||
|
||||
// Create the optional char bigrams object.
|
||||
char_bigrams_ = CharBigrams::Create(data_file_path, lang_);
|
||||
|
||||
// Create the optional word unigrams object.
|
||||
word_unigrams_ = WordUnigrams::Create(data_file_path, lang_);
|
||||
|
||||
// Create the optional size model.
|
||||
word_size_model_ = WordSizeModel::Create(data_file_path, lang_,
|
||||
char_set_, Contextual());
|
||||
|
||||
// Load tuning params.
|
||||
params_ = CubeTuningParams::Create(data_file_path, lang_);
|
||||
if (params_ == NULL) {
|
||||
fprintf(stderr, "Cube ERROR (CubeRecoContext::Load): unable to read "
|
||||
"CubeTuningParams from %s\n", data_file_path.c_str());
|
||||
return false;
|
||||
}
|
||||
|
||||
// Create the char classifier.
|
||||
char_classifier_ = CharClassifierFactory::Create(data_file_path, lang_,
|
||||
lang_mod_, char_set_,
|
||||
params_);
|
||||
if (char_classifier_ == NULL) {
|
||||
fprintf(stderr, "Cube ERROR (CubeRecoContext::Load): unable to load "
|
||||
"CharClassifierFactory object from %s\n", data_file_path.c_str());
|
||||
return false;
|
||||
}
|
||||
|
||||
loaded_ = true;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
/** Creates a CubeRecoContext object using a tesseract object */
|
||||
CubeRecoContext * CubeRecoContext::Create(Tesseract *tess_obj,
|
||||
TessdataManager *tessdata_manager,
|
||||
UNICHARSET *tess_unicharset) {
|
||||
// create the object
|
||||
CubeRecoContext *cntxt = new CubeRecoContext(tess_obj);
|
||||
// load the necessary components
|
||||
if (cntxt->Load(tessdata_manager, tess_unicharset) == false) {
|
||||
fprintf(stderr, "Cube ERROR (CubeRecoContext::Create): unable to init "
|
||||
"CubeRecoContext object\n");
|
||||
delete cntxt;
|
||||
return NULL;
|
||||
}
|
||||
// success
|
||||
return cntxt;
|
||||
}
|
||||
} // tesseract}
|
|
@ -0,0 +1,157 @@
|
|||
/**********************************************************************
|
||||
* File: cube_reco_context.h
|
||||
* Description: Declaration of the Cube Recognition Context Class
|
||||
* Author: Ahmad Abdulkader
|
||||
* Created: 2007
|
||||
*
|
||||
* (C) Copyright 2008, Google Inc.
|
||||
** Licensed under the Apache License, Version 2.0 (the "License");
|
||||
** you may not use this file except in compliance with the License.
|
||||
** You may obtain a copy of the License at
|
||||
** http://www.apache.org/licenses/LICENSE-2.0
|
||||
** Unless required by applicable law or agreed to in writing, software
|
||||
** distributed under the License is distributed on an "AS IS" BASIS,
|
||||
** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
** See the License for the specific language governing permissions and
|
||||
** limitations under the License.
|
||||
*
|
||||
**********************************************************************/
|
||||
|
||||
// The CubeRecoContext class abstracts the Cube OCR Engine. Typically a process
|
||||
// (or a thread) would create one CubeRecoContext object per language.
|
||||
// The CubeRecoContext object also provides methods to get and set the
|
||||
// different attribues of the Cube OCR Engine.
|
||||
|
||||
#ifndef CUBE_RECO_CONTEXT_H
|
||||
#define CUBE_RECO_CONTEXT_H
|
||||
|
||||
#include <string>
|
||||
#include "neural_net.h"
|
||||
#include "lang_model.h"
|
||||
#include "classifier_base.h"
|
||||
#include "feature_base.h"
|
||||
#include "char_set.h"
|
||||
#include "word_size_model.h"
|
||||
#include "char_bigrams.h"
|
||||
#include "word_unigrams.h"
|
||||
|
||||
namespace tesseract {
|
||||
|
||||
class Tesseract;
|
||||
class TessdataManager;
|
||||
|
||||
class CubeRecoContext {
|
||||
public:
|
||||
// Reading order enum type
|
||||
enum ReadOrder {
|
||||
L2R,
|
||||
R2L
|
||||
};
|
||||
|
||||
// Instantiate using a Tesseract object
|
||||
CubeRecoContext(Tesseract *tess_obj);
|
||||
|
||||
~CubeRecoContext();
|
||||
|
||||
// accessor functions
|
||||
inline const string & Lang() const { return lang_; }
|
||||
inline CharSet *CharacterSet() const { return char_set_; }
|
||||
const UNICHARSET *TessUnicharset() const { return tess_unicharset_; }
|
||||
inline CharClassifier *Classifier() const { return char_classifier_; }
|
||||
inline WordSizeModel *SizeModel() const { return word_size_model_; }
|
||||
inline CharBigrams *Bigrams() const { return char_bigrams_; }
|
||||
inline WordUnigrams *WordUnigramsObj() const { return word_unigrams_; }
|
||||
inline TuningParams *Params() const { return params_; }
|
||||
inline LangModel *LangMod() const { return lang_mod_; }
|
||||
|
||||
// the reading order of the language
|
||||
inline ReadOrder ReadingOrder() const {
|
||||
return ((lang_ == "ara") ? R2L : L2R);
|
||||
}
|
||||
|
||||
// does the language support case
|
||||
inline bool HasCase() const {
|
||||
return (lang_ != "ara" && lang_ != "hin");
|
||||
}
|
||||
|
||||
inline bool Cursive() const {
|
||||
return (lang_ == "ara");
|
||||
}
|
||||
|
||||
inline bool HasItalics() const {
|
||||
return (lang_ != "ara" && lang_ != "hin");
|
||||
}
|
||||
|
||||
inline bool Contextual() const {
|
||||
return (lang_ == "ara");
|
||||
}
|
||||
|
||||
// RecoContext runtime flags accessor functions
|
||||
inline bool SizeNormalization() const { return size_normalization_; }
|
||||
inline bool NoisyInput() const { return noisy_input_; }
|
||||
inline bool OOD() const { return lang_mod_->OOD(); }
|
||||
inline bool Numeric() const { return lang_mod_->Numeric(); }
|
||||
inline bool WordList() const { return lang_mod_->WordList(); }
|
||||
inline bool Punc() const { return lang_mod_->Punc(); }
|
||||
inline bool CaseSensitive() const {
|
||||
return char_classifier_->CaseSensitive();
|
||||
}
|
||||
|
||||
inline void SetSizeNormalization(bool size_normalization) {
|
||||
size_normalization_ = size_normalization;
|
||||
}
|
||||
inline void SetNoisyInput(bool noisy_input) {
|
||||
noisy_input_ = noisy_input;
|
||||
}
|
||||
inline void SetOOD(bool ood_enabled) {
|
||||
lang_mod_->SetOOD(ood_enabled);
|
||||
}
|
||||
inline void SetNumeric(bool numeric_enabled) {
|
||||
lang_mod_->SetNumeric(numeric_enabled);
|
||||
}
|
||||
inline void SetWordList(bool word_list_enabled) {
|
||||
lang_mod_->SetWordList(word_list_enabled);
|
||||
}
|
||||
inline void SetPunc(bool punc_enabled) {
|
||||
lang_mod_->SetPunc(punc_enabled);
|
||||
}
|
||||
inline void SetCaseSensitive(bool case_sensitive) {
|
||||
char_classifier_->SetCaseSensitive(case_sensitive);
|
||||
}
|
||||
inline tesseract::Tesseract *TesseractObject() const {
|
||||
return tess_obj_;
|
||||
}
|
||||
|
||||
// Returns the path of the data files
|
||||
bool GetDataFilePath(string *path) const;
|
||||
// Creates a CubeRecoContext object using a tesseract object. Data
|
||||
// files are loaded via the tessdata_manager, and the tesseract
|
||||
// unicharset is provided in order to map Cube's unicharset to
|
||||
// Tesseract's in the case where the two unicharsets differ.
|
||||
static CubeRecoContext *Create(Tesseract *tess_obj,
|
||||
TessdataManager *tessdata_manager,
|
||||
UNICHARSET *tess_unicharset);
|
||||
|
||||
private:
|
||||
bool loaded_;
|
||||
string lang_;
|
||||
CharSet *char_set_;
|
||||
UNICHARSET *tess_unicharset_;
|
||||
WordSizeModel *word_size_model_;
|
||||
CharClassifier *char_classifier_;
|
||||
CharBigrams *char_bigrams_;
|
||||
WordUnigrams *word_unigrams_;
|
||||
TuningParams *params_;
|
||||
LangModel *lang_mod_;
|
||||
Tesseract *tess_obj_; // CubeRecoContext does not own this pointer
|
||||
bool size_normalization_;
|
||||
bool noisy_input_;
|
||||
|
||||
// Loads and initialized all the necessary components of a
|
||||
// CubeRecoContext. See .cpp for more details.
|
||||
bool Load(TessdataManager *tessdata_manager,
|
||||
UNICHARSET *tess_unicharset);
|
||||
};
|
||||
}
|
||||
|
||||
#endif // CUBE_RECO_CONTEXT_H
|
|
@ -0,0 +1,134 @@
|
|||
// Copyright 2011 Google Inc. All Rights Reserved.
|
||||
// Author: rays@google.com (Ray Smith)
|
||||
///////////////////////////////////////////////////////////////////////
|
||||
// File: cubeclassifier.cpp
|
||||
// Description: Cube implementation of a ShapeClassifier.
|
||||
// Author: Ray Smith
|
||||
// Created: Wed Nov 23 10:39:45 PST 2011
|
||||
//
|
||||
// (C) Copyright 2011, Google Inc.
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
///////////////////////////////////////////////////////////////////////
|
||||
|
||||
#include "cubeclassifier.h"
|
||||
|
||||
#include "char_altlist.h"
|
||||
#include "char_set.h"
|
||||
#include "cube_object.h"
|
||||
#include "cube_reco_context.h"
|
||||
#include "tessclassifier.h"
|
||||
#include "tesseractclass.h"
|
||||
#include "trainingsample.h"
|
||||
#include "unicharset.h"
|
||||
|
||||
namespace tesseract {
|
||||
|
||||
CubeClassifier::CubeClassifier(tesseract::Tesseract* tesseract)
|
||||
: cube_cntxt_(tesseract->GetCubeRecoContext()),
|
||||
shape_table_(*tesseract->shape_table()) {
|
||||
}
|
||||
CubeClassifier::~CubeClassifier() {
|
||||
}
|
||||
|
||||
/// Classifies the given [training] sample, writing to results.
|
||||
/// See ShapeClassifier for a full description.
|
||||
int CubeClassifier::UnicharClassifySample(
|
||||
const TrainingSample& sample, Pix* page_pix, int debug,
|
||||
UNICHAR_ID keep_this, GenericVector<UnicharRating>* results) {
|
||||
results->clear();
|
||||
if (page_pix == NULL) return 0;
|
||||
|
||||
ASSERT_HOST(cube_cntxt_ != NULL);
|
||||
const TBOX& char_box = sample.bounding_box();
|
||||
CubeObject* cube_obj = new tesseract::CubeObject(
|
||||
cube_cntxt_, page_pix, char_box.left(),
|
||||
pixGetHeight(page_pix) - char_box.top(),
|
||||
char_box.width(), char_box.height());
|
||||
CharAltList* alt_list = cube_obj->RecognizeChar();
|
||||
if (alt_list != NULL) {
|
||||
alt_list->Sort();
|
||||
CharSet* char_set = cube_cntxt_->CharacterSet();
|
||||
for (int i = 0; i < alt_list->AltCount(); ++i) {
|
||||
// Convert cube representation to a shape_id.
|
||||
int alt_id = alt_list->Alt(i);
|
||||
int unichar_id = char_set->UnicharID(char_set->ClassString(alt_id));
|
||||
if (unichar_id >= 0)
|
||||
results->push_back(UnicharRating(unichar_id, alt_list->AltProb(i)));
|
||||
}
|
||||
delete alt_list;
|
||||
}
|
||||
delete cube_obj;
|
||||
return results->size();
|
||||
}
|
||||
|
||||
/** Provides access to the ShapeTable that this classifier works with. */
|
||||
const ShapeTable* CubeClassifier::GetShapeTable() const {
|
||||
return &shape_table_;
|
||||
}
|
||||
|
||||
CubeTessClassifier::CubeTessClassifier(tesseract::Tesseract* tesseract)
|
||||
: cube_cntxt_(tesseract->GetCubeRecoContext()),
|
||||
shape_table_(*tesseract->shape_table()),
|
||||
pruner_(new TessClassifier(true, tesseract)) {
|
||||
}
|
||||
CubeTessClassifier::~CubeTessClassifier() {
|
||||
delete pruner_;
|
||||
}
|
||||
|
||||
/// Classifies the given [training] sample, writing to results.
|
||||
/// See ShapeClassifier for a full description.
|
||||
int CubeTessClassifier::UnicharClassifySample(
|
||||
const TrainingSample& sample, Pix* page_pix, int debug,
|
||||
UNICHAR_ID keep_this, GenericVector<UnicharRating>* results) {
|
||||
int num_results = pruner_->UnicharClassifySample(sample, page_pix, debug,
|
||||
keep_this, results);
|
||||
if (page_pix == NULL) return num_results;
|
||||
|
||||
ASSERT_HOST(cube_cntxt_ != NULL);
|
||||
const TBOX& char_box = sample.bounding_box();
|
||||
CubeObject* cube_obj = new tesseract::CubeObject(
|
||||
cube_cntxt_, page_pix, char_box.left(),
|
||||
pixGetHeight(page_pix) - char_box.top(),
|
||||
char_box.width(), char_box.height());
|
||||
CharAltList* alt_list = cube_obj->RecognizeChar();
|
||||
CharSet* char_set = cube_cntxt_->CharacterSet();
|
||||
if (alt_list != NULL) {
|
||||
for (int r = 0; r < num_results; ++r) {
|
||||
// Get the best cube probability of the unichar in the result.
|
||||
double best_prob = 0.0;
|
||||
for (int i = 0; i < alt_list->AltCount(); ++i) {
|
||||
int alt_id = alt_list->Alt(i);
|
||||
int unichar_id = char_set->UnicharID(char_set->ClassString(alt_id));
|
||||
if (unichar_id == (*results)[r].unichar_id &&
|
||||
alt_list->AltProb(i) > best_prob) {
|
||||
best_prob = alt_list->AltProb(i);
|
||||
}
|
||||
}
|
||||
(*results)[r].rating = best_prob;
|
||||
}
|
||||
delete alt_list;
|
||||
// Re-sort by rating.
|
||||
results->sort(&UnicharRating::SortDescendingRating);
|
||||
}
|
||||
delete cube_obj;
|
||||
return results->size();
|
||||
}
|
||||
|
||||
/** Provides access to the ShapeTable that this classifier works with. */
|
||||
const ShapeTable* CubeTessClassifier::GetShapeTable() const {
|
||||
return &shape_table_;
|
||||
}
|
||||
|
||||
} // namespace tesseract
|
||||
|
||||
|
||||
|
|
@ -0,0 +1,81 @@
|
|||
// Copyright 2011 Google Inc. All Rights Reserved.
|
||||
// Author: rays@google.com (Ray Smith)
|
||||
///////////////////////////////////////////////////////////////////////
|
||||
// File: cubeclassifier.h
|
||||
// Description: Cube implementation of a ShapeClassifier.
|
||||
// Author: Ray Smith
|
||||
// Created: Wed Nov 23 10:36:32 PST 2011
|
||||
//
|
||||
// (C) Copyright 2011, Google Inc.
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
///////////////////////////////////////////////////////////////////////
|
||||
|
||||
#ifndef THIRD_PARTY_TESSERACT_CCMAIN_CUBECLASSIFIER_H_
|
||||
#define THIRD_PARTY_TESSERACT_CCMAIN_CUBECLASSIFIER_H_
|
||||
|
||||
#include "shapeclassifier.h"
|
||||
#include "platform.h"
|
||||
|
||||
namespace tesseract {
|
||||
|
||||
class Classify;
|
||||
class CubeRecoContext;
|
||||
class ShapeTable;
|
||||
class TessClassifier;
|
||||
class Tesseract;
|
||||
class TrainingSample;
|
||||
struct UnicharRating;
|
||||
|
||||
// Cube implementation of a ShapeClassifier.
|
||||
class TESS_API CubeClassifier : public ShapeClassifier {
|
||||
public:
|
||||
explicit CubeClassifier(Tesseract* tesseract);
|
||||
virtual ~CubeClassifier();
|
||||
|
||||
// Classifies the given [training] sample, writing to results.
|
||||
// See ShapeClassifier for a full description.
|
||||
virtual int UnicharClassifySample(const TrainingSample& sample, Pix* page_pix,
|
||||
int debug, UNICHAR_ID keep_this,
|
||||
GenericVector<UnicharRating>* results);
|
||||
// Provides access to the ShapeTable that this classifier works with.
|
||||
virtual const ShapeTable* GetShapeTable() const;
|
||||
|
||||
private:
|
||||
// Cube objects.
|
||||
CubeRecoContext* cube_cntxt_;
|
||||
const ShapeTable& shape_table_;
|
||||
};
|
||||
|
||||
// Combination of Tesseract class pruner with scoring by cube.
|
||||
class TESS_API CubeTessClassifier : public ShapeClassifier {
|
||||
public:
|
||||
explicit CubeTessClassifier(Tesseract* tesseract);
|
||||
virtual ~CubeTessClassifier();
|
||||
|
||||
// Classifies the given [training] sample, writing to results.
|
||||
// See ShapeClassifier for a full description.
|
||||
virtual int UnicharClassifySample(const TrainingSample& sample, Pix* page_pix,
|
||||
int debug, UNICHAR_ID keep_this,
|
||||
GenericVector<UnicharRating>* results);
|
||||
// Provides access to the ShapeTable that this classifier works with.
|
||||
virtual const ShapeTable* GetShapeTable() const;
|
||||
|
||||
private:
|
||||
// Cube objects.
|
||||
CubeRecoContext* cube_cntxt_;
|
||||
const ShapeTable& shape_table_;
|
||||
TessClassifier* pruner_;
|
||||
};
|
||||
|
||||
} // namespace tesseract
|
||||
|
||||
#endif /* THIRD_PARTY_TESSERACT_CCMAIN_CUBECLASSIFIER_H_ */
|
File diff suppressed because it is too large
Load Diff
|
@ -0,0 +1,35 @@
|
|||
/******************************************************************
|
||||
* File: docqual.h (Formerly docqual.h)
|
||||
* Description: Document Quality Metrics
|
||||
* Author: Phil Cheatle
|
||||
* Created: Mon May 9 11:27:28 BST 1994
|
||||
*
|
||||
* (C) Copyright 1994, Hewlett-Packard Ltd.
|
||||
** Licensed under the Apache License, Version 2.0 (the "License");
|
||||
** you may not use this file except in compliance with the License.
|
||||
** You may obtain a copy of the License at
|
||||
** http://www.apache.org/licenses/LICENSE-2.0
|
||||
** Unless required by applicable law or agreed to in writing, software
|
||||
** distributed under the License is distributed on an "AS IS" BASIS,
|
||||
** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
** See the License for the specific language governing permissions and
|
||||
** limitations under the License.
|
||||
*
|
||||
**********************************************************************/
|
||||
|
||||
#ifndef DOCQUAL_H
|
||||
#define DOCQUAL_H
|
||||
|
||||
#include "control.h"
|
||||
|
||||
enum GARBAGE_LEVEL
|
||||
{
|
||||
G_NEVER_CRUNCH,
|
||||
G_OK,
|
||||
G_DODGY,
|
||||
G_TERRIBLE
|
||||
};
|
||||
|
||||
inT16 word_blob_quality(WERD_RES *word, ROW *row);
|
||||
void reject_whole_page(PAGE_RES_IT &page_res_it);
|
||||
#endif
|
File diff suppressed because it is too large
Load Diff
|
@ -0,0 +1,278 @@
|
|||
///////////////////////////////////////////////////////////////////////
|
||||
// File: equationdetect.h
|
||||
// Description: The equation detection class that inherits equationdetectbase.
|
||||
// Author: Zongyi (Joe) Liu (joeliu@google.com)
|
||||
// Created: Fri Aug 31 11:13:01 PST 2011
|
||||
//
|
||||
// (C) Copyright 2011, Google Inc.
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
///////////////////////////////////////////////////////////////////////
|
||||
|
||||
#ifndef TESSERACT_CCMAIN_EQUATIONDETECT_H__
|
||||
#define TESSERACT_CCMAIN_EQUATIONDETECT_H__
|
||||
|
||||
#include "blobbox.h"
|
||||
#include "equationdetectbase.h"
|
||||
#include "genericvector.h"
|
||||
#include "tesseractclass.h"
|
||||
#include "unichar.h"
|
||||
|
||||
class BLOBNBOX;
|
||||
class BLOB_CHOICE;
|
||||
class BLOB_CHOICE_LIST;
|
||||
class TO_BLOCK_LIST;
|
||||
class TBOX;
|
||||
class UNICHARSET;
|
||||
|
||||
namespace tesseract {
|
||||
|
||||
class Tesseract;
|
||||
class ColPartition;
|
||||
class ColPartitionGrid;
|
||||
class ColPartitionSet;
|
||||
|
||||
class EquationDetect : public EquationDetectBase {
|
||||
public:
|
||||
EquationDetect(const char* equ_datapath,
|
||||
const char* equ_language);
|
||||
~EquationDetect();
|
||||
|
||||
enum IndentType {
|
||||
NO_INDENT,
|
||||
LEFT_INDENT,
|
||||
RIGHT_INDENT,
|
||||
BOTH_INDENT,
|
||||
INDENT_TYPE_COUNT
|
||||
};
|
||||
|
||||
// Reset the lang_tesseract_ pointer. This function should be called before we
|
||||
// do any detector work.
|
||||
void SetLangTesseract(Tesseract* lang_tesseract);
|
||||
|
||||
// Iterate over the blobs inside to_block, and set the blobs that we want to
|
||||
// process to BSTT_NONE. (By default, they should be BSTT_SKIP). The function
|
||||
// returns 0 upon success.
|
||||
int LabelSpecialText(TO_BLOCK* to_block);
|
||||
|
||||
// Find possible equation partitions from part_grid. Should be called
|
||||
// after the special_text_type of blobs are set.
|
||||
// It returns 0 upon success.
|
||||
int FindEquationParts(ColPartitionGrid* part_grid,
|
||||
ColPartitionSet** best_columns);
|
||||
|
||||
// Reset the resolution of the processing image. TEST only function.
|
||||
void SetResolution(const int resolution);
|
||||
|
||||
protected:
|
||||
// Identify the special text type for one blob, and update its field. When
|
||||
// height_th is set (> 0), we will label the blob as BSTT_NONE if its height
|
||||
// is less than height_th.
|
||||
void IdentifySpecialText(BLOBNBOX *blob, const int height_th);
|
||||
|
||||
// Estimate the type for one unichar.
|
||||
BlobSpecialTextType EstimateTypeForUnichar(
|
||||
const UNICHARSET& unicharset, const UNICHAR_ID id) const;
|
||||
|
||||
// Compute special text type for each blobs in part_grid_.
|
||||
void IdentifySpecialText();
|
||||
|
||||
// Identify blobs that we want to skip during special blob type
|
||||
// classification.
|
||||
void IdentifyBlobsToSkip(ColPartition* part);
|
||||
|
||||
// The ColPartitions in part_grid_ maybe over-segmented, particularly in the
|
||||
// block equation regions. So we like to identify these partitions and merge
|
||||
// them before we do the searching.
|
||||
void MergePartsByLocation();
|
||||
|
||||
// Staring from the seed center, we do radius search. And for partitions that
|
||||
// have large overlaps with seed, we remove them from part_grid_ and add into
|
||||
// parts_overlap. Note: this function may update the part_grid_, so if the
|
||||
// caller is also running ColPartitionGridSearch, use the RepositionIterator
|
||||
// to continue.
|
||||
void SearchByOverlap(ColPartition* seed,
|
||||
GenericVector<ColPartition*>* parts_overlap);
|
||||
|
||||
// Insert part back into part_grid_, after it absorbs some other parts.
|
||||
void InsertPartAfterAbsorb(ColPartition* part);
|
||||
|
||||
// Identify the colparitions in part_grid_, label them as PT_EQUATION, and
|
||||
// save them into cp_seeds_.
|
||||
void IdentifySeedParts();
|
||||
|
||||
// Check the blobs count for a seed region candidate.
|
||||
bool CheckSeedBlobsCount(ColPartition* part);
|
||||
|
||||
// Compute the foreground pixel density for a tbox area.
|
||||
float ComputeForegroundDensity(const TBOX& tbox);
|
||||
|
||||
// Check if part from seed2 label: with low math density and left indented. We
|
||||
// are using two checks:
|
||||
// 1. If its left is aligned with any coordinates in indented_texts_left,
|
||||
// which we assume have been sorted.
|
||||
// 2. If its foreground density is over foreground_density_th.
|
||||
bool CheckForSeed2(
|
||||
const GenericVector<int>& indented_texts_left,
|
||||
const float foreground_density_th,
|
||||
ColPartition* part);
|
||||
|
||||
// Count the number of values in sorted_vec that is close to val, used to
|
||||
// check if a partition is aligned with text partitions.
|
||||
int CountAlignment(
|
||||
const GenericVector<int>& sorted_vec, const int val) const;
|
||||
|
||||
// Check for a seed candidate using the foreground pixel density. And we
|
||||
// return true if the density is below a certain threshold, because characters
|
||||
// in equation regions usually are apart with more white spaces.
|
||||
bool CheckSeedFgDensity(const float density_th, ColPartition* part);
|
||||
|
||||
// A light version of SplitCPHor: instead of really doing the part split, we
|
||||
// simply compute the union bounding box of each splitted part.
|
||||
void SplitCPHorLite(ColPartition* part, GenericVector<TBOX>* splitted_boxes);
|
||||
|
||||
// Split the part (horizontally), and save the splitted result into
|
||||
// parts_splitted. Note that it is caller's responsibility to release the
|
||||
// memory owns by parts_splitted. On the other hand, the part is unchanged
|
||||
// during this process and still owns the blobs, so do NOT call DeleteBoxes
|
||||
// when freeing the colpartitions in parts_splitted.
|
||||
void SplitCPHor(ColPartition* part,
|
||||
GenericVector<ColPartition*>* parts_splitted);
|
||||
|
||||
// Check the density for a seed candidate (part) using its math density and
|
||||
// italic density, returns true if the check passed.
|
||||
bool CheckSeedDensity(const float math_density_high,
|
||||
const float math_density_low,
|
||||
const ColPartition* part) const;
|
||||
|
||||
// Check if part is indented.
|
||||
IndentType IsIndented(ColPartition* part);
|
||||
|
||||
// Identify inline partitions from cp_seeds_, and re-label them.
|
||||
void IdentifyInlineParts();
|
||||
|
||||
// Comute the super bounding box for all colpartitions inside part_grid_.
|
||||
void ComputeCPsSuperBBox();
|
||||
|
||||
// Identify inline partitions from cp_seeds_ using the horizontal search.
|
||||
void IdentifyInlinePartsHorizontal();
|
||||
|
||||
// Estimate the line spacing between two text partitions. Returns -1 if not
|
||||
// enough data.
|
||||
int EstimateTextPartLineSpacing();
|
||||
|
||||
// Identify inline partitions from cp_seeds_ using vertical search.
|
||||
void IdentifyInlinePartsVertical(const bool top_to_bottom,
|
||||
const int textPartsLineSpacing);
|
||||
|
||||
// Check if part is an inline equation zone. This should be called after we
|
||||
// identified the seed regions.
|
||||
bool IsInline(const bool search_bottom,
|
||||
const int textPartsLineSpacing,
|
||||
ColPartition* part);
|
||||
|
||||
// For a given seed partition, we search the part_grid_ and see if there is
|
||||
// any partition can be merged with it. It returns true if the seed has been
|
||||
// expanded.
|
||||
bool ExpandSeed(ColPartition* seed);
|
||||
|
||||
// Starting from the seed position, we search the part_grid_
|
||||
// horizontally/vertically, find all parititions that can be
|
||||
// merged with seed, remove them from part_grid_, and put them into
|
||||
// parts_to_merge.
|
||||
void ExpandSeedHorizontal(const bool search_left,
|
||||
ColPartition* seed,
|
||||
GenericVector<ColPartition*>* parts_to_merge);
|
||||
void ExpandSeedVertical(const bool search_bottom,
|
||||
ColPartition* seed,
|
||||
GenericVector<ColPartition*>* parts_to_merge);
|
||||
|
||||
// Check if a part_box is the small neighbor of seed_box.
|
||||
bool IsNearSmallNeighbor(const TBOX& seed_box,
|
||||
const TBOX& part_box) const;
|
||||
|
||||
// Perform the density check for part, which we assume is nearing a seed
|
||||
// partition. It returns true if the check passed.
|
||||
bool CheckSeedNeighborDensity(const ColPartition* part) const;
|
||||
|
||||
// After identify the math blocks, we do one more scanning on all text
|
||||
// partitions, and check if any of them is the satellite of:
|
||||
// math blocks: here a p is the satellite of q if:
|
||||
// 1. q is the nearest vertical neighbor of p, and
|
||||
// 2. y_gap(p, q) is less than a threshold, and
|
||||
// 3. x_overlap(p, q) is over a threshold.
|
||||
// Note that p can be the satellites of two blocks: its top neighbor and
|
||||
// bottom neighbor.
|
||||
void ProcessMathBlockSatelliteParts();
|
||||
|
||||
// Check if part is the satellite of one/two math blocks. If it is, we return
|
||||
// true, and save the blocks into math_blocks.
|
||||
bool IsMathBlockSatellite(
|
||||
ColPartition* part, GenericVector<ColPartition*>* math_blocks);
|
||||
|
||||
// Search the nearest neighbor of part in one vertical direction as defined in
|
||||
// search_bottom. It returns the neighbor found that major x overlap with it,
|
||||
// or NULL when not found.
|
||||
ColPartition* SearchNNVertical(const bool search_bottom,
|
||||
const ColPartition* part);
|
||||
|
||||
// Check if the neighbor with vertical distance of y_gap is a near and math
|
||||
// block partition.
|
||||
bool IsNearMathNeighbor(const int y_gap, const ColPartition *neighbor) const;
|
||||
|
||||
// Generate the tiff file name for output/debug file.
|
||||
void GetOutputTiffName(const char* name, STRING* image_name) const;
|
||||
|
||||
// Debugger function that renders ColPartitions on the input image, where:
|
||||
// parts labeled as PT_EQUATION will be painted in red, PT_INLINE_EQUATION
|
||||
// will be painted in green, and other parts will be painted in blue.
|
||||
void PaintColParts(const STRING& outfile) const;
|
||||
|
||||
// Debugger function that renders the blobs in part_grid_ over the input
|
||||
// image.
|
||||
void PaintSpecialTexts(const STRING& outfile) const;
|
||||
|
||||
// Debugger function that print the math blobs density values for a
|
||||
// ColPartition object.
|
||||
void PrintSpecialBlobsDensity(const ColPartition* part) const;
|
||||
|
||||
// The tesseract engine intialized from equation training data.
|
||||
Tesseract equ_tesseract_;
|
||||
|
||||
// The tesseract engine used for OCR. This pointer is passed in by the caller,
|
||||
// so do NOT destroy it in this class.
|
||||
Tesseract* lang_tesseract_;
|
||||
|
||||
// The ColPartitionGrid that we are processing. This pointer is passed in from
|
||||
// the caller, so do NOT destroy it in the class.
|
||||
ColPartitionGrid* part_grid_;
|
||||
|
||||
// A simple array of pointers to the best assigned column division at
|
||||
// each grid y coordinate. This pointer is passed in from the caller, so do
|
||||
// NOT destroy it in the class.
|
||||
ColPartitionSet** best_columns_;
|
||||
|
||||
// The super bounding box of all cps in the part_grid_.
|
||||
TBOX* cps_super_bbox_;
|
||||
|
||||
// The seed ColPartition for equation region.
|
||||
GenericVector<ColPartition*> cp_seeds_;
|
||||
|
||||
// The resolution (dpi) of the processing image.
|
||||
int resolution_;
|
||||
|
||||
// The number of pages we have processed.
|
||||
int page_count_;
|
||||
};
|
||||
|
||||
} // namespace tesseract
|
||||
|
||||
#endif // TESSERACT_CCMAIN_EQUATIONDETECT_H_
|
|
@ -0,0 +1,876 @@
|
|||
/******************************************************************
|
||||
* File: fixspace.cpp (Formerly fixspace.c)
|
||||
* Description: Implements a pass over the page res, exploring the alternative
|
||||
* spacing possibilities, trying to use context to improve the
|
||||
* word spacing
|
||||
* Author: Phil Cheatle
|
||||
* Created: Thu Oct 21 11:38:43 BST 1993
|
||||
*
|
||||
* (C) Copyright 1993, Hewlett-Packard Ltd.
|
||||
** Licensed under the Apache License, Version 2.0 (the "License");
|
||||
** you may not use this file except in compliance with the License.
|
||||
** You may obtain a copy of the License at
|
||||
** http://www.apache.org/licenses/LICENSE-2.0
|
||||
** Unless required by applicable law or agreed to in writing, software
|
||||
** distributed under the License is distributed on an "AS IS" BASIS,
|
||||
** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
** See the License for the specific language governing permissions and
|
||||
** limitations under the License.
|
||||
*
|
||||
**********************************************************************/
|
||||
|
||||
#include <ctype.h>
|
||||
#include "reject.h"
|
||||
#include "statistc.h"
|
||||
#include "control.h"
|
||||
#include "fixspace.h"
|
||||
#include "genblob.h"
|
||||
#include "tessvars.h"
|
||||
#include "tessbox.h"
|
||||
#include "globals.h"
|
||||
#include "tesseractclass.h"
|
||||
|
||||
#define PERFECT_WERDS 999
|
||||
#define MAXSPACING 128 /*max expected spacing in pix */
|
||||
|
||||
namespace tesseract {
|
||||
|
||||
/**
|
||||
* @name fix_fuzzy_spaces()
|
||||
* Walk over the page finding sequences of words joined by fuzzy spaces. Extract
|
||||
* them as a sublist, process the sublist to find the optimal arrangement of
|
||||
* spaces then replace the sublist in the ROW_RES.
|
||||
*
|
||||
* @param monitor progress monitor
|
||||
* @param word_count count of words in doc
|
||||
* @param[out] page_res
|
||||
*/
|
||||
void Tesseract::fix_fuzzy_spaces(ETEXT_DESC *monitor,
|
||||
inT32 word_count,
|
||||
PAGE_RES *page_res) {
|
||||
BLOCK_RES_IT block_res_it;
|
||||
ROW_RES_IT row_res_it;
|
||||
WERD_RES_IT word_res_it_from;
|
||||
WERD_RES_IT word_res_it_to;
|
||||
WERD_RES *word_res;
|
||||
WERD_RES_LIST fuzzy_space_words;
|
||||
inT16 new_length;
|
||||
BOOL8 prevent_null_wd_fixsp; // DON'T process blobless wds
|
||||
inT32 word_index; // current word
|
||||
|
||||
block_res_it.set_to_list(&page_res->block_res_list);
|
||||
word_index = 0;
|
||||
for (block_res_it.mark_cycle_pt(); !block_res_it.cycled_list();
|
||||
block_res_it.forward()) {
|
||||
row_res_it.set_to_list(&block_res_it.data()->row_res_list);
|
||||
for (row_res_it.mark_cycle_pt(); !row_res_it.cycled_list();
|
||||
row_res_it.forward()) {
|
||||
word_res_it_from.set_to_list(&row_res_it.data()->word_res_list);
|
||||
while (!word_res_it_from.at_last()) {
|
||||
word_res = word_res_it_from.data();
|
||||
while (!word_res_it_from.at_last() &&
|
||||
!(word_res->combination ||
|
||||
word_res_it_from.data_relative(1)->word->flag(W_FUZZY_NON) ||
|
||||
word_res_it_from.data_relative(1)->word->flag(W_FUZZY_SP))) {
|
||||
fix_sp_fp_word(word_res_it_from, row_res_it.data()->row,
|
||||
block_res_it.data()->block);
|
||||
word_res = word_res_it_from.forward();
|
||||
word_index++;
|
||||
if (monitor != NULL) {
|
||||
monitor->ocr_alive = TRUE;
|
||||
monitor->progress = 90 + 5 * word_index / word_count;
|
||||
if (monitor->deadline_exceeded() ||
|
||||
(monitor->cancel != NULL &&
|
||||
(*monitor->cancel)(monitor->cancel_this, stats_.dict_words)))
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
if (!word_res_it_from.at_last()) {
|
||||
word_res_it_to = word_res_it_from;
|
||||
prevent_null_wd_fixsp =
|
||||
word_res->word->cblob_list()->empty();
|
||||
if (check_debug_pt(word_res, 60))
|
||||
debug_fix_space_level.set_value(10);
|
||||
word_res_it_to.forward();
|
||||
word_index++;
|
||||
if (monitor != NULL) {
|
||||
monitor->ocr_alive = TRUE;
|
||||
monitor->progress = 90 + 5 * word_index / word_count;
|
||||
if (monitor->deadline_exceeded() ||
|
||||
(monitor->cancel != NULL &&
|
||||
(*monitor->cancel)(monitor->cancel_this, stats_.dict_words)))
|
||||
return;
|
||||
}
|
||||
while (!word_res_it_to.at_last() &&
|
||||
(word_res_it_to.data_relative(1)->word->flag(W_FUZZY_NON) ||
|
||||
word_res_it_to.data_relative(1)->word->flag(W_FUZZY_SP))) {
|
||||
if (check_debug_pt(word_res, 60))
|
||||
debug_fix_space_level.set_value(10);
|
||||
if (word_res->word->cblob_list()->empty())
|
||||
prevent_null_wd_fixsp = TRUE;
|
||||
word_res = word_res_it_to.forward();
|
||||
}
|
||||
if (check_debug_pt(word_res, 60))
|
||||
debug_fix_space_level.set_value(10);
|
||||
if (word_res->word->cblob_list()->empty())
|
||||
prevent_null_wd_fixsp = TRUE;
|
||||
if (prevent_null_wd_fixsp) {
|
||||
word_res_it_from = word_res_it_to;
|
||||
}
|
||||
else {
|
||||
fuzzy_space_words.assign_to_sublist(&word_res_it_from,
|
||||
&word_res_it_to);
|
||||
fix_fuzzy_space_list(fuzzy_space_words,
|
||||
row_res_it.data()->row,
|
||||
block_res_it.data()->block);
|
||||
new_length = fuzzy_space_words.length();
|
||||
word_res_it_from.add_list_before(&fuzzy_space_words);
|
||||
for (;
|
||||
!word_res_it_from.at_last() && new_length > 0;
|
||||
new_length--) {
|
||||
word_res_it_from.forward();
|
||||
}
|
||||
}
|
||||
if (test_pt)
|
||||
debug_fix_space_level.set_value(0);
|
||||
}
|
||||
fix_sp_fp_word(word_res_it_from, row_res_it.data()->row,
|
||||
block_res_it.data()->block);
|
||||
// Last word in row
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void Tesseract::fix_fuzzy_space_list(WERD_RES_LIST &best_perm,
|
||||
ROW *row,
|
||||
BLOCK* block) {
|
||||
inT16 best_score;
|
||||
WERD_RES_LIST current_perm;
|
||||
inT16 current_score;
|
||||
BOOL8 improved = FALSE;
|
||||
|
||||
best_score = eval_word_spacing(best_perm); // default score
|
||||
dump_words(best_perm, best_score, 1, improved);
|
||||
|
||||
if (best_score != PERFECT_WERDS)
|
||||
initialise_search(best_perm, current_perm);
|
||||
|
||||
while ((best_score != PERFECT_WERDS) && !current_perm.empty()) {
|
||||
match_current_words(current_perm, row, block);
|
||||
current_score = eval_word_spacing(current_perm);
|
||||
dump_words(current_perm, current_score, 2, improved);
|
||||
if (current_score > best_score) {
|
||||
best_perm.clear();
|
||||
best_perm.deep_copy(¤t_perm, &WERD_RES::deep_copy);
|
||||
best_score = current_score;
|
||||
improved = TRUE;
|
||||
}
|
||||
if (current_score < PERFECT_WERDS)
|
||||
transform_to_next_perm(current_perm);
|
||||
}
|
||||
dump_words(best_perm, best_score, 3, improved);
|
||||
}
|
||||
|
||||
} // namespace tesseract
|
||||
|
||||
void initialise_search(WERD_RES_LIST &src_list, WERD_RES_LIST &new_list) {
|
||||
WERD_RES_IT src_it(&src_list);
|
||||
WERD_RES_IT new_it(&new_list);
|
||||
WERD_RES *src_wd;
|
||||
WERD_RES *new_wd;
|
||||
|
||||
for (src_it.mark_cycle_pt(); !src_it.cycled_list(); src_it.forward()) {
|
||||
src_wd = src_it.data();
|
||||
if (!src_wd->combination) {
|
||||
new_wd = WERD_RES::deep_copy(src_wd);
|
||||
new_wd->combination = FALSE;
|
||||
new_wd->part_of_combo = FALSE;
|
||||
new_it.add_after_then_move(new_wd);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
namespace tesseract {
|
||||
void Tesseract::match_current_words(WERD_RES_LIST &words, ROW *row,
|
||||
BLOCK* block) {
|
||||
WERD_RES_IT word_it(&words);
|
||||
WERD_RES *word;
|
||||
// Since we are not using PAGE_RES to iterate over words, we need to update
|
||||
// prev_word_best_choice_ before calling classify_word_pass2().
|
||||
prev_word_best_choice_ = NULL;
|
||||
for (word_it.mark_cycle_pt(); !word_it.cycled_list(); word_it.forward()) {
|
||||
word = word_it.data();
|
||||
if ((!word->part_of_combo) && (word->box_word == NULL)) {
|
||||
WordData word_data(block, row, word);
|
||||
SetupWordPassN(2, &word_data);
|
||||
classify_word_and_language(2, NULL, &word_data);
|
||||
}
|
||||
prev_word_best_choice_ = word->best_choice;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* @name eval_word_spacing()
|
||||
* The basic measure is the number of characters in contextually confirmed
|
||||
* words. (I.e the word is done)
|
||||
* If all words are contextually confirmed the evaluation is deemed perfect.
|
||||
*
|
||||
* Some fiddles are done to handle "1"s as these are VERY frequent causes of
|
||||
* fuzzy spaces. The problem with the basic measure is that "561 63" would score
|
||||
* the same as "56163", though given our knowledge that the space is fuzzy, and
|
||||
* that there is a "1" next to the fuzzy space, we need to ensure that "56163"
|
||||
* is preferred.
|
||||
*
|
||||
* The solution is to NOT COUNT the score of any word which has a digit at one
|
||||
* end and a "1Il" as the character the other side of the space.
|
||||
*
|
||||
* Conversly, any character next to a "1" within a word is counted as a positive
|
||||
* score. Thus "561 63" would score 4 (3 chars in a numeric word plus 1 side of
|
||||
* the "1" joined). "56163" would score 7 - all chars in a numeric word + 2
|
||||
* sides of a "1" joined.
|
||||
*
|
||||
* The joined 1 rule is applied to any word REGARDLESS of contextual
|
||||
* confirmation. Thus "PS7a71 3/7a" scores 1 (neither word is contexutally
|
||||
* confirmed. The only score is from the joined 1. "PS7a713/7a" scores 2.
|
||||
*
|
||||
*/
|
||||
inT16 Tesseract::eval_word_spacing(WERD_RES_LIST &word_res_list) {
|
||||
WERD_RES_IT word_res_it(&word_res_list);
|
||||
inT16 total_score = 0;
|
||||
inT16 word_count = 0;
|
||||
inT16 done_word_count = 0;
|
||||
inT16 word_len;
|
||||
inT16 i;
|
||||
inT16 offset;
|
||||
WERD_RES *word; // current word
|
||||
inT16 prev_word_score = 0;
|
||||
BOOL8 prev_word_done = FALSE;
|
||||
BOOL8 prev_char_1 = FALSE; // prev ch a "1/I/l"?
|
||||
BOOL8 prev_char_digit = FALSE; // prev ch 2..9 or 0
|
||||
BOOL8 current_char_1 = FALSE;
|
||||
BOOL8 current_word_ok_so_far;
|
||||
STRING punct_chars = "!\"`',.:;";
|
||||
BOOL8 prev_char_punct = FALSE;
|
||||
BOOL8 current_char_punct = FALSE;
|
||||
BOOL8 word_done = FALSE;
|
||||
|
||||
do {
|
||||
word = word_res_it.data();
|
||||
word_done = fixspace_thinks_word_done(word);
|
||||
word_count++;
|
||||
if (word->tess_failed) {
|
||||
total_score += prev_word_score;
|
||||
if (prev_word_done)
|
||||
done_word_count++;
|
||||
prev_word_score = 0;
|
||||
prev_char_1 = FALSE;
|
||||
prev_char_digit = FALSE;
|
||||
prev_word_done = FALSE;
|
||||
}
|
||||
else {
|
||||
/*
|
||||
Can we add the prev word score and potentially count this word?
|
||||
Yes IF it didn't end in a 1 when the first char of this word is a digit
|
||||
AND it didn't end in a digit when the first char of this word is a 1
|
||||
*/
|
||||
word_len = word->reject_map.length();
|
||||
current_word_ok_so_far = FALSE;
|
||||
if (!((prev_char_1 && digit_or_numeric_punct(word, 0)) ||
|
||||
(prev_char_digit && (
|
||||
(word_done &&
|
||||
word->best_choice->unichar_lengths().string()[0] == 1 &&
|
||||
word->best_choice->unichar_string()[0] == '1') ||
|
||||
(!word_done && STRING(conflict_set_I_l_1).contains(
|
||||
word->best_choice->unichar_string()[0])))))) {
|
||||
total_score += prev_word_score;
|
||||
if (prev_word_done)
|
||||
done_word_count++;
|
||||
current_word_ok_so_far = word_done;
|
||||
}
|
||||
|
||||
if (current_word_ok_so_far) {
|
||||
prev_word_done = TRUE;
|
||||
prev_word_score = word_len;
|
||||
}
|
||||
else {
|
||||
prev_word_done = FALSE;
|
||||
prev_word_score = 0;
|
||||
}
|
||||
|
||||
/* Add 1 to total score for every joined 1 regardless of context and
|
||||
rejtn */
|
||||
for (i = 0, prev_char_1 = FALSE; i < word_len; i++) {
|
||||
current_char_1 = word->best_choice->unichar_string()[i] == '1';
|
||||
if (prev_char_1 || (current_char_1 && (i > 0)))
|
||||
total_score++;
|
||||
prev_char_1 = current_char_1;
|
||||
}
|
||||
|
||||
/* Add 1 to total score for every joined punctuation regardless of context
|
||||
and rejtn */
|
||||
if (tessedit_prefer_joined_punct) {
|
||||
for (i = 0, offset = 0, prev_char_punct = FALSE; i < word_len;
|
||||
offset += word->best_choice->unichar_lengths()[i++]) {
|
||||
current_char_punct =
|
||||
punct_chars.contains(word->best_choice->unichar_string()[offset]);
|
||||
if (prev_char_punct || (current_char_punct && i > 0))
|
||||
total_score++;
|
||||
prev_char_punct = current_char_punct;
|
||||
}
|
||||
}
|
||||
prev_char_digit = digit_or_numeric_punct(word, word_len - 1);
|
||||
for (i = 0, offset = 0; i < word_len - 1;
|
||||
offset += word->best_choice->unichar_lengths()[i++]);
|
||||
prev_char_1 =
|
||||
((word_done && (word->best_choice->unichar_string()[offset] == '1'))
|
||||
|| (!word_done && STRING(conflict_set_I_l_1).contains(
|
||||
word->best_choice->unichar_string()[offset])));
|
||||
}
|
||||
/* Find next word */
|
||||
do {
|
||||
word_res_it.forward();
|
||||
} while (word_res_it.data()->part_of_combo);
|
||||
} while (!word_res_it.at_first());
|
||||
total_score += prev_word_score;
|
||||
if (prev_word_done)
|
||||
done_word_count++;
|
||||
if (done_word_count == word_count)
|
||||
return PERFECT_WERDS;
|
||||
else
|
||||
return total_score;
|
||||
}
|
||||
|
||||
BOOL8 Tesseract::digit_or_numeric_punct(WERD_RES *word, int char_position) {
|
||||
int i;
|
||||
int offset;
|
||||
|
||||
for (i = 0, offset = 0; i < char_position;
|
||||
offset += word->best_choice->unichar_lengths()[i++]);
|
||||
return (
|
||||
word->uch_set->get_isdigit(
|
||||
word->best_choice->unichar_string().string() + offset,
|
||||
word->best_choice->unichar_lengths()[i]) ||
|
||||
(word->best_choice->permuter() == NUMBER_PERM &&
|
||||
STRING(numeric_punctuation).contains(
|
||||
word->best_choice->unichar_string().string()[offset])));
|
||||
}
|
||||
|
||||
} // namespace tesseract
|
||||
|
||||
|
||||
/**
|
||||
* @name transform_to_next_perm()
|
||||
* Examines the current word list to find the smallest word gap size. Then walks
|
||||
* the word list closing any gaps of this size by either inserted new
|
||||
* combination words, or extending existing ones.
|
||||
*
|
||||
* The routine COULD be limited to stop it building words longer than N blobs.
|
||||
*
|
||||
* If there are no more gaps then it DELETES the entire list and returns the
|
||||
* empty list to cause termination.
|
||||
*/
|
||||
void transform_to_next_perm(WERD_RES_LIST &words) {
|
||||
WERD_RES_IT word_it(&words);
|
||||
WERD_RES_IT prev_word_it(&words);
|
||||
WERD_RES *word;
|
||||
WERD_RES *prev_word;
|
||||
WERD_RES *combo;
|
||||
WERD *copy_word;
|
||||
inT16 prev_right = -MAX_INT16;
|
||||
TBOX box;
|
||||
inT16 gap;
|
||||
inT16 min_gap = MAX_INT16;
|
||||
|
||||
for (word_it.mark_cycle_pt(); !word_it.cycled_list(); word_it.forward()) {
|
||||
word = word_it.data();
|
||||
if (!word->part_of_combo) {
|
||||
box = word->word->bounding_box();
|
||||
if (prev_right > -MAX_INT16) {
|
||||
gap = box.left() - prev_right;
|
||||
if (gap < min_gap)
|
||||
min_gap = gap;
|
||||
}
|
||||
prev_right = box.right();
|
||||
}
|
||||
}
|
||||
if (min_gap < MAX_INT16) {
|
||||
prev_right = -MAX_INT16; // back to start
|
||||
word_it.set_to_list(&words);
|
||||
// Note: we can't use cycle_pt due to inserted combos at start of list.
|
||||
for (; (prev_right == -MAX_INT16) || !word_it.at_first();
|
||||
word_it.forward()) {
|
||||
word = word_it.data();
|
||||
if (!word->part_of_combo) {
|
||||
box = word->word->bounding_box();
|
||||
if (prev_right > -MAX_INT16) {
|
||||
gap = box.left() - prev_right;
|
||||
if (gap <= min_gap) {
|
||||
prev_word = prev_word_it.data();
|
||||
if (prev_word->combination) {
|
||||
combo = prev_word;
|
||||
}
|
||||
else {
|
||||
/* Make a new combination and insert before
|
||||
* the first word being joined. */
|
||||
copy_word = new WERD;
|
||||
*copy_word = *(prev_word->word);
|
||||
// deep copy
|
||||
combo = new WERD_RES(copy_word);
|
||||
combo->combination = TRUE;
|
||||
combo->x_height = prev_word->x_height;
|
||||
prev_word->part_of_combo = TRUE;
|
||||
prev_word_it.add_before_then_move(combo);
|
||||
}
|
||||
combo->word->set_flag(W_EOL, word->word->flag(W_EOL));
|
||||
if (word->combination) {
|
||||
combo->word->join_on(word->word);
|
||||
// Move blobs to combo
|
||||
// old combo no longer needed
|
||||
delete word_it.extract();
|
||||
}
|
||||
else {
|
||||
// Copy current wd to combo
|
||||
combo->copy_on(word);
|
||||
word->part_of_combo = TRUE;
|
||||
}
|
||||
combo->done = FALSE;
|
||||
combo->ClearResults();
|
||||
}
|
||||
else {
|
||||
prev_word_it = word_it; // catch up
|
||||
}
|
||||
}
|
||||
prev_right = box.right();
|
||||
}
|
||||
}
|
||||
}
|
||||
else {
|
||||
words.clear(); // signal termination
|
||||
}
|
||||
}
|
||||
|
||||
namespace tesseract {
|
||||
void Tesseract::dump_words(WERD_RES_LIST &perm, inT16 score,
|
||||
inT16 mode, BOOL8 improved) {
|
||||
WERD_RES_IT word_res_it(&perm);
|
||||
|
||||
if (debug_fix_space_level > 0) {
|
||||
if (mode == 1) {
|
||||
stats_.dump_words_str = "";
|
||||
for (word_res_it.mark_cycle_pt(); !word_res_it.cycled_list();
|
||||
word_res_it.forward()) {
|
||||
if (!word_res_it.data()->part_of_combo) {
|
||||
stats_.dump_words_str +=
|
||||
word_res_it.data()->best_choice->unichar_string();
|
||||
stats_.dump_words_str += ' ';
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (debug_fix_space_level > 1) {
|
||||
switch (mode) {
|
||||
case 1:
|
||||
tprintf("EXTRACTED (%d): \"", score);
|
||||
break;
|
||||
case 2:
|
||||
tprintf("TESTED (%d): \"", score);
|
||||
break;
|
||||
case 3:
|
||||
tprintf("RETURNED (%d): \"", score);
|
||||
break;
|
||||
}
|
||||
|
||||
for (word_res_it.mark_cycle_pt(); !word_res_it.cycled_list();
|
||||
word_res_it.forward()) {
|
||||
if (!word_res_it.data()->part_of_combo) {
|
||||
tprintf("%s/%1d ",
|
||||
word_res_it.data()->best_choice->unichar_string().string(),
|
||||
(int)word_res_it.data()->best_choice->permuter());
|
||||
}
|
||||
}
|
||||
tprintf("\"\n");
|
||||
}
|
||||
else if (improved) {
|
||||
tprintf("FIX SPACING \"%s\" => \"", stats_.dump_words_str.string());
|
||||
for (word_res_it.mark_cycle_pt(); !word_res_it.cycled_list();
|
||||
word_res_it.forward()) {
|
||||
if (!word_res_it.data()->part_of_combo) {
|
||||
tprintf("%s/%1d ",
|
||||
word_res_it.data()->best_choice->unichar_string().string(),
|
||||
(int)word_res_it.data()->best_choice->permuter());
|
||||
}
|
||||
}
|
||||
tprintf("\"\n");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
BOOL8 Tesseract::fixspace_thinks_word_done(WERD_RES *word) {
|
||||
if (word->done)
|
||||
return TRUE;
|
||||
|
||||
/*
|
||||
Use all the standard pass 2 conditions for mode 5 in set_done() in
|
||||
reject.c BUT DON'T REJECT IF THE WERD IS AMBIGUOUS - FOR SPACING WE DON'T
|
||||
CARE WHETHER WE HAVE of/at on/an etc.
|
||||
*/
|
||||
if (fixsp_done_mode > 0 &&
|
||||
(word->tess_accepted ||
|
||||
(fixsp_done_mode == 2 && word->reject_map.reject_count() == 0) ||
|
||||
fixsp_done_mode == 3) &&
|
||||
(strchr(word->best_choice->unichar_string().string(), ' ') == NULL) &&
|
||||
((word->best_choice->permuter() == SYSTEM_DAWG_PERM) ||
|
||||
(word->best_choice->permuter() == FREQ_DAWG_PERM) ||
|
||||
(word->best_choice->permuter() == USER_DAWG_PERM) ||
|
||||
(word->best_choice->permuter() == NUMBER_PERM))) {
|
||||
return TRUE;
|
||||
}
|
||||
else {
|
||||
return FALSE;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* @name fix_sp_fp_word()
|
||||
* Test the current word to see if it can be split by deleting noise blobs. If
|
||||
* so, do the business.
|
||||
* Return with the iterator pointing to the same place if the word is unchanged,
|
||||
* or the last of the replacement words.
|
||||
*/
|
||||
void Tesseract::fix_sp_fp_word(WERD_RES_IT &word_res_it, ROW *row,
|
||||
BLOCK* block) {
|
||||
WERD_RES *word_res;
|
||||
WERD_RES_LIST sub_word_list;
|
||||
WERD_RES_IT sub_word_list_it(&sub_word_list);
|
||||
inT16 blob_index;
|
||||
inT16 new_length;
|
||||
float junk;
|
||||
|
||||
word_res = word_res_it.data();
|
||||
if (word_res->word->flag(W_REP_CHAR) ||
|
||||
word_res->combination ||
|
||||
word_res->part_of_combo ||
|
||||
!word_res->word->flag(W_DONT_CHOP))
|
||||
return;
|
||||
|
||||
blob_index = worst_noise_blob(word_res, &junk);
|
||||
if (blob_index < 0)
|
||||
return;
|
||||
|
||||
if (debug_fix_space_level > 1) {
|
||||
tprintf("FP fixspace working on \"%s\"\n",
|
||||
word_res->best_choice->unichar_string().string());
|
||||
}
|
||||
word_res->word->rej_cblob_list()->sort(c_blob_comparator);
|
||||
sub_word_list_it.add_after_stay_put(word_res_it.extract());
|
||||
fix_noisy_space_list(sub_word_list, row, block);
|
||||
new_length = sub_word_list.length();
|
||||
word_res_it.add_list_before(&sub_word_list);
|
||||
for (; !word_res_it.at_last() && new_length > 1; new_length--) {
|
||||
word_res_it.forward();
|
||||
}
|
||||
}
|
||||
|
||||
void Tesseract::fix_noisy_space_list(WERD_RES_LIST &best_perm, ROW *row,
|
||||
BLOCK* block) {
|
||||
inT16 best_score;
|
||||
WERD_RES_IT best_perm_it(&best_perm);
|
||||
WERD_RES_LIST current_perm;
|
||||
WERD_RES_IT current_perm_it(¤t_perm);
|
||||
WERD_RES *old_word_res;
|
||||
inT16 current_score;
|
||||
BOOL8 improved = FALSE;
|
||||
|
||||
best_score = fp_eval_word_spacing(best_perm); // default score
|
||||
|
||||
dump_words(best_perm, best_score, 1, improved);
|
||||
|
||||
old_word_res = best_perm_it.data();
|
||||
// Even deep_copy doesn't copy the underlying WERD unless its combination
|
||||
// flag is true!.
|
||||
old_word_res->combination = TRUE; // Kludge to force deep copy
|
||||
current_perm_it.add_to_end(WERD_RES::deep_copy(old_word_res));
|
||||
old_word_res->combination = FALSE; // Undo kludge
|
||||
|
||||
break_noisiest_blob_word(current_perm);
|
||||
|
||||
while (best_score != PERFECT_WERDS && !current_perm.empty()) {
|
||||
match_current_words(current_perm, row, block);
|
||||
current_score = fp_eval_word_spacing(current_perm);
|
||||
dump_words(current_perm, current_score, 2, improved);
|
||||
if (current_score > best_score) {
|
||||
best_perm.clear();
|
||||
best_perm.deep_copy(¤t_perm, &WERD_RES::deep_copy);
|
||||
best_score = current_score;
|
||||
improved = TRUE;
|
||||
}
|
||||
if (current_score < PERFECT_WERDS) {
|
||||
break_noisiest_blob_word(current_perm);
|
||||
}
|
||||
}
|
||||
dump_words(best_perm, best_score, 3, improved);
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* break_noisiest_blob_word()
|
||||
* Find the word with the blob which looks like the worst noise.
|
||||
* Break the word into two, deleting the noise blob.
|
||||
*/
|
||||
void Tesseract::break_noisiest_blob_word(WERD_RES_LIST &words) {
|
||||
WERD_RES_IT word_it(&words);
|
||||
WERD_RES_IT worst_word_it;
|
||||
float worst_noise_score = 9999;
|
||||
int worst_blob_index = -1; // Noisiest blob of noisiest wd
|
||||
int blob_index; // of wds noisiest blob
|
||||
float noise_score; // of wds noisiest blob
|
||||
WERD_RES *word_res;
|
||||
C_BLOB_IT blob_it;
|
||||
C_BLOB_IT rej_cblob_it;
|
||||
C_BLOB_LIST new_blob_list;
|
||||
C_BLOB_IT new_blob_it;
|
||||
C_BLOB_IT new_rej_cblob_it;
|
||||
WERD *new_word;
|
||||
inT16 start_of_noise_blob;
|
||||
inT16 i;
|
||||
|
||||
for (word_it.mark_cycle_pt(); !word_it.cycled_list(); word_it.forward()) {
|
||||
blob_index = worst_noise_blob(word_it.data(), &noise_score);
|
||||
if (blob_index > -1 && worst_noise_score > noise_score) {
|
||||
worst_noise_score = noise_score;
|
||||
worst_blob_index = blob_index;
|
||||
worst_word_it = word_it;
|
||||
}
|
||||
}
|
||||
if (worst_blob_index < 0) {
|
||||
words.clear(); // signal termination
|
||||
return;
|
||||
}
|
||||
|
||||
/* Now split the worst_word_it */
|
||||
|
||||
word_res = worst_word_it.data();
|
||||
|
||||
/* Move blobs before noise blob to a new bloblist */
|
||||
|
||||
new_blob_it.set_to_list(&new_blob_list);
|
||||
blob_it.set_to_list(word_res->word->cblob_list());
|
||||
for (i = 0; i < worst_blob_index; i++, blob_it.forward()) {
|
||||
new_blob_it.add_after_then_move(blob_it.extract());
|
||||
}
|
||||
start_of_noise_blob = blob_it.data()->bounding_box().left();
|
||||
delete blob_it.extract(); // throw out noise blob
|
||||
|
||||
new_word = new WERD(&new_blob_list, word_res->word);
|
||||
new_word->set_flag(W_EOL, FALSE);
|
||||
word_res->word->set_flag(W_BOL, FALSE);
|
||||
word_res->word->set_blanks(1); // After break
|
||||
|
||||
new_rej_cblob_it.set_to_list(new_word->rej_cblob_list());
|
||||
rej_cblob_it.set_to_list(word_res->word->rej_cblob_list());
|
||||
for (;
|
||||
(!rej_cblob_it.empty() &&
|
||||
(rej_cblob_it.data()->bounding_box().left() < start_of_noise_blob));
|
||||
rej_cblob_it.forward()) {
|
||||
new_rej_cblob_it.add_after_then_move(rej_cblob_it.extract());
|
||||
}
|
||||
|
||||
WERD_RES* new_word_res = new WERD_RES(new_word);
|
||||
new_word_res->combination = TRUE;
|
||||
worst_word_it.add_before_then_move(new_word_res);
|
||||
|
||||
word_res->ClearResults();
|
||||
}
|
||||
|
||||
inT16 Tesseract::worst_noise_blob(WERD_RES *word_res,
|
||||
float *worst_noise_score) {
|
||||
float noise_score[512];
|
||||
int i;
|
||||
int min_noise_blob; // 1st contender
|
||||
int max_noise_blob; // last contender
|
||||
int non_noise_count;
|
||||
int worst_noise_blob; // Worst blob
|
||||
float small_limit = kBlnXHeight * fixsp_small_outlines_size;
|
||||
float non_noise_limit = kBlnXHeight * 0.8;
|
||||
|
||||
if (word_res->rebuild_word == NULL)
|
||||
return -1; // Can't handle cube words.
|
||||
|
||||
// Normalised.
|
||||
int blob_count = word_res->box_word->length();
|
||||
ASSERT_HOST(blob_count <= 512);
|
||||
if (blob_count < 5)
|
||||
return -1; // too short to split
|
||||
|
||||
/* Get the noise scores for all blobs */
|
||||
|
||||
#ifndef SECURE_NAMES
|
||||
if (debug_fix_space_level > 5)
|
||||
tprintf("FP fixspace Noise metrics for \"%s\": ",
|
||||
word_res->best_choice->unichar_string().string());
|
||||
#endif
|
||||
|
||||
for (i = 0; i < blob_count && i < word_res->rebuild_word->NumBlobs(); i++) {
|
||||
TBLOB* blob = word_res->rebuild_word->blobs[i];
|
||||
if (word_res->reject_map[i].accepted())
|
||||
noise_score[i] = non_noise_limit;
|
||||
else
|
||||
noise_score[i] = blob_noise_score(blob);
|
||||
|
||||
if (debug_fix_space_level > 5)
|
||||
tprintf("%1.1f ", noise_score[i]);
|
||||
}
|
||||
if (debug_fix_space_level > 5)
|
||||
tprintf("\n");
|
||||
|
||||
/* Now find the worst one which is far enough away from the end of the word */
|
||||
|
||||
non_noise_count = 0;
|
||||
for (i = 0; i < blob_count && non_noise_count < fixsp_non_noise_limit; i++) {
|
||||
if (noise_score[i] >= non_noise_limit) {
|
||||
non_noise_count++;
|
||||
}
|
||||
}
|
||||
if (non_noise_count < fixsp_non_noise_limit)
|
||||
return -1;
|
||||
|
||||
min_noise_blob = i;
|
||||
|
||||
non_noise_count = 0;
|
||||
for (i = blob_count - 1; i >= 0 && non_noise_count < fixsp_non_noise_limit;
|
||||
i--) {
|
||||
if (noise_score[i] >= non_noise_limit) {
|
||||
non_noise_count++;
|
||||
}
|
||||
}
|
||||
if (non_noise_count < fixsp_non_noise_limit)
|
||||
return -1;
|
||||
|
||||
max_noise_blob = i;
|
||||
|
||||
if (min_noise_blob > max_noise_blob)
|
||||
return -1;
|
||||
|
||||
*worst_noise_score = small_limit;
|
||||
worst_noise_blob = -1;
|
||||
for (i = min_noise_blob; i <= max_noise_blob; i++) {
|
||||
if (noise_score[i] < *worst_noise_score) {
|
||||
worst_noise_blob = i;
|
||||
*worst_noise_score = noise_score[i];
|
||||
}
|
||||
}
|
||||
return worst_noise_blob;
|
||||
}
|
||||
|
||||
float Tesseract::blob_noise_score(TBLOB *blob) {
|
||||
TBOX box; // BB of outline
|
||||
inT16 outline_count = 0;
|
||||
inT16 max_dimension;
|
||||
inT16 largest_outline_dimension = 0;
|
||||
|
||||
for (TESSLINE* ol = blob->outlines; ol != NULL; ol = ol->next) {
|
||||
outline_count++;
|
||||
box = ol->bounding_box();
|
||||
if (box.height() > box.width()) {
|
||||
max_dimension = box.height();
|
||||
}
|
||||
else {
|
||||
max_dimension = box.width();
|
||||
}
|
||||
|
||||
if (largest_outline_dimension < max_dimension)
|
||||
largest_outline_dimension = max_dimension;
|
||||
}
|
||||
|
||||
if (outline_count > 5) {
|
||||
// penalise LOTS of blobs
|
||||
largest_outline_dimension *= 2;
|
||||
}
|
||||
|
||||
box = blob->bounding_box();
|
||||
if (box.bottom() > kBlnBaselineOffset * 4 ||
|
||||
box.top() < kBlnBaselineOffset / 2) {
|
||||
// Lax blob is if high or low
|
||||
largest_outline_dimension /= 2;
|
||||
}
|
||||
|
||||
return largest_outline_dimension;
|
||||
}
|
||||
} // namespace tesseract
|
||||
|
||||
void fixspace_dbg(WERD_RES *word) {
|
||||
TBOX box = word->word->bounding_box();
|
||||
BOOL8 show_map_detail = FALSE;
|
||||
inT16 i;
|
||||
|
||||
box.print();
|
||||
tprintf(" \"%s\" ", word->best_choice->unichar_string().string());
|
||||
tprintf("Blob count: %d (word); %d/%d (rebuild word)\n",
|
||||
word->word->cblob_list()->length(),
|
||||
word->rebuild_word->NumBlobs(),
|
||||
word->box_word->length());
|
||||
word->reject_map.print(debug_fp);
|
||||
tprintf("\n");
|
||||
if (show_map_detail) {
|
||||
tprintf("\"%s\"\n", word->best_choice->unichar_string().string());
|
||||
for (i = 0; word->best_choice->unichar_string()[i] != '\0'; i++) {
|
||||
tprintf("**** \"%c\" ****\n", word->best_choice->unichar_string()[i]);
|
||||
word->reject_map[i].full_print(debug_fp);
|
||||
}
|
||||
}
|
||||
|
||||
tprintf("Tess Accepted: %s\n", word->tess_accepted ? "TRUE" : "FALSE");
|
||||
tprintf("Done flag: %s\n\n", word->done ? "TRUE" : "FALSE");
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* fp_eval_word_spacing()
|
||||
* Evaluation function for fixed pitch word lists.
|
||||
*
|
||||
* Basically, count the number of "nice" characters - those which are in tess
|
||||
* acceptable words or in dict words and are not rejected.
|
||||
* Penalise any potential noise chars
|
||||
*/
|
||||
namespace tesseract {
|
||||
inT16 Tesseract::fp_eval_word_spacing(WERD_RES_LIST &word_res_list) {
|
||||
WERD_RES_IT word_it(&word_res_list);
|
||||
WERD_RES *word;
|
||||
inT16 score = 0;
|
||||
inT16 i;
|
||||
float small_limit = kBlnXHeight * fixsp_small_outlines_size;
|
||||
|
||||
for (word_it.mark_cycle_pt(); !word_it.cycled_list(); word_it.forward()) {
|
||||
word = word_it.data();
|
||||
if (word->rebuild_word == NULL)
|
||||
continue; // Can't handle cube words.
|
||||
if (word->done ||
|
||||
word->tess_accepted ||
|
||||
word->best_choice->permuter() == SYSTEM_DAWG_PERM ||
|
||||
word->best_choice->permuter() == FREQ_DAWG_PERM ||
|
||||
word->best_choice->permuter() == USER_DAWG_PERM ||
|
||||
safe_dict_word(word) > 0) {
|
||||
int num_blobs = word->rebuild_word->NumBlobs();
|
||||
UNICHAR_ID space = word->uch_set->unichar_to_id(" ");
|
||||
for (i = 0; i < word->best_choice->length() && i < num_blobs; ++i) {
|
||||
TBLOB* blob = word->rebuild_word->blobs[i];
|
||||
if (word->best_choice->unichar_id(i) == space ||
|
||||
blob_noise_score(blob) < small_limit) {
|
||||
score -= 1; // penalise possibly erroneous non-space
|
||||
}
|
||||
else if (word->reject_map[i].accepted()) {
|
||||
score++;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
if (score < 0)
|
||||
score = 0;
|
||||
return score;
|
||||
}
|
||||
|
||||
} // namespace tesseract
|
|
@ -0,0 +1,31 @@
|
|||
/******************************************************************
|
||||
* File: fixspace.h (Formerly fixspace.h)
|
||||
* Description: Implements a pass over the page res, exploring the alternative
|
||||
* spacing possibilities, trying to use context to improve the
|
||||
word spacing
|
||||
* Author: Phil Cheatle
|
||||
* Created: Thu Oct 21 11:38:43 BST 1993
|
||||
*
|
||||
* (C) Copyright 1993, Hewlett-Packard Ltd.
|
||||
** Licensed under the Apache License, Version 2.0 (the "License");
|
||||
** you may not use this file except in compliance with the License.
|
||||
** You may obtain a copy of the License at
|
||||
** http://www.apache.org/licenses/LICENSE-2.0
|
||||
** Unless required by applicable law or agreed to in writing, software
|
||||
** distributed under the License is distributed on an "AS IS" BASIS,
|
||||
** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
** See the License for the specific language governing permissions and
|
||||
** limitations under the License.
|
||||
*
|
||||
**********************************************************************/
|
||||
|
||||
#ifndef FIXSPACE_H
|
||||
#define FIXSPACE_H
|
||||
|
||||
#include "pageres.h"
|
||||
#include "params.h"
|
||||
|
||||
void initialise_search(WERD_RES_LIST &src_list, WERD_RES_LIST &new_list);
|
||||
void transform_to_next_perm(WERD_RES_LIST &words);
|
||||
void fixspace_dbg(WERD_RES *word);
|
||||
#endif
|
|
@ -0,0 +1,216 @@
|
|||
/**********************************************************************
|
||||
* File: fixxht.cpp (Formerly fixxht.c)
|
||||
* Description: Improve x_ht and look out for case inconsistencies
|
||||
* Author: Phil Cheatle
|
||||
* Created: Thu Aug 5 14:11:08 BST 1993
|
||||
*
|
||||
* (C) Copyright 1992, Hewlett-Packard Ltd.
|
||||
** Licensed under the Apache License, Version 2.0 (the "License");
|
||||
** you may not use this file except in compliance with the License.
|
||||
** You may obtain a copy of the License at
|
||||
** http://www.apache.org/licenses/LICENSE-2.0
|
||||
** Unless required by applicable law or agreed to in writing, software
|
||||
** distributed under the License is distributed on an "AS IS" BASIS,
|
||||
** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
** See the License for the specific language governing permissions and
|
||||
** limitations under the License.
|
||||
*
|
||||
**********************************************************************/
|
||||
|
||||
#include <string.h>
|
||||
#include <ctype.h>
|
||||
#include "params.h"
|
||||
#include "float2int.h"
|
||||
#include "tesseractclass.h"
|
||||
|
||||
namespace tesseract {
|
||||
|
||||
// Fixxht overview.
|
||||
// Premise: Initial estimate of x-height is adequate most of the time, but
|
||||
// occasionally it is incorrect. Most notable causes of failure are:
|
||||
// 1. Small caps, where the top of the caps is the same as the body text
|
||||
// xheight. For small caps words the xheight needs to be reduced to correctly
|
||||
// recognize the caps in the small caps word.
|
||||
// 2. All xheight lines, such as summer. Here the initial estimate will have
|
||||
// guessed that the blob tops are caps and will have placed the xheight too low.
|
||||
// 3. Noise/logos beside words, or changes in font size on a line. Such
|
||||
// things can blow the statistics and cause an incorrect estimate.
|
||||
// 4. Incorrect baseline. Can happen when 2 columns are incorrectly merged.
|
||||
// In this case the x-height is often still correct.
|
||||
//
|
||||
// Algorithm.
|
||||
// Compare the vertical position (top only) of alphnumerics in a word with
|
||||
// the range of positions in training data (in the unicharset).
|
||||
// See CountMisfitTops. If any characters disagree sufficiently with the
|
||||
// initial xheight estimate, then recalculate the xheight, re-run OCR on
|
||||
// the word, and if the number of vertical misfits goes down, along with
|
||||
// either the word rating or certainty, then keep the new xheight.
|
||||
// The new xheight is calculated as follows:ComputeCompatibleXHeight
|
||||
// For each alphanumeric character that has a vertically misplaced top
|
||||
// (a misfit), yet its bottom is within the acceptable range (ie it is not
|
||||
// likely a sub-or super-script) calculate the range of acceptable xheight
|
||||
// positions from its range of tops, and give each value in the range a
|
||||
// number of votes equal to the distance of its top from its acceptance range.
|
||||
// The x-height position with the median of the votes becomes the new
|
||||
// x-height. This assumes that most characters will be correctly recognized
|
||||
// even if the x-height is incorrect. This is not a terrible assumption, but
|
||||
// it is not great. An improvement would be to use a classifier that does
|
||||
// not care about vertical position or scaling at all.
|
||||
// Separately collect stats on shifted baselines and apply the same logic to
|
||||
// computing a best-fit shift to fix the error. If the baseline needs to be
|
||||
// shifted, but the x-height is OK, returns the original x-height along with
|
||||
// the baseline shift to indicate that recognition needs to re-run.
|
||||
|
||||
// If the max-min top of a unicharset char is bigger than kMaxCharTopRange
|
||||
// then the char top cannot be used to judge misfits or suggest a new top.
|
||||
const int kMaxCharTopRange = 48;
|
||||
|
||||
// Returns the number of misfit blob tops in this word.
|
||||
int Tesseract::CountMisfitTops(WERD_RES *word_res) {
|
||||
int bad_blobs = 0;
|
||||
int num_blobs = word_res->rebuild_word->NumBlobs();
|
||||
for (int blob_id = 0; blob_id < num_blobs; ++blob_id) {
|
||||
TBLOB* blob = word_res->rebuild_word->blobs[blob_id];
|
||||
UNICHAR_ID class_id = word_res->best_choice->unichar_id(blob_id);
|
||||
if (unicharset.get_isalpha(class_id) || unicharset.get_isdigit(class_id)) {
|
||||
int top = blob->bounding_box().top();
|
||||
if (top >= INT_FEAT_RANGE)
|
||||
top = INT_FEAT_RANGE - 1;
|
||||
int min_bottom, max_bottom, min_top, max_top;
|
||||
unicharset.get_top_bottom(class_id, &min_bottom, &max_bottom,
|
||||
&min_top, &max_top);
|
||||
if (max_top - min_top > kMaxCharTopRange)
|
||||
continue;
|
||||
bool bad = top < min_top - x_ht_acceptance_tolerance ||
|
||||
top > max_top + x_ht_acceptance_tolerance;
|
||||
if (bad)
|
||||
++bad_blobs;
|
||||
if (debug_x_ht_level >= 1) {
|
||||
tprintf("Class %s is %s with top %d vs limits of %d->%d, +/-%d\n",
|
||||
unicharset.id_to_unichar(class_id),
|
||||
bad ? "Misfit" : "OK", top, min_top, max_top,
|
||||
static_cast<int>(x_ht_acceptance_tolerance));
|
||||
}
|
||||
}
|
||||
}
|
||||
return bad_blobs;
|
||||
}
|
||||
|
||||
// Returns a new x-height maximally compatible with the result in word_res.
|
||||
// See comment above for overall algorithm.
|
||||
float Tesseract::ComputeCompatibleXheight(WERD_RES *word_res,
|
||||
float* baseline_shift) {
|
||||
STATS top_stats(0, MAX_UINT8);
|
||||
STATS shift_stats(-MAX_UINT8, MAX_UINT8);
|
||||
int bottom_shift = 0;
|
||||
int num_blobs = word_res->rebuild_word->NumBlobs();
|
||||
do {
|
||||
top_stats.clear();
|
||||
shift_stats.clear();
|
||||
for (int blob_id = 0; blob_id < num_blobs; ++blob_id) {
|
||||
TBLOB* blob = word_res->rebuild_word->blobs[blob_id];
|
||||
UNICHAR_ID class_id = word_res->best_choice->unichar_id(blob_id);
|
||||
if (unicharset.get_isalpha(class_id) ||
|
||||
unicharset.get_isdigit(class_id)) {
|
||||
int top = blob->bounding_box().top() + bottom_shift;
|
||||
// Clip the top to the limit of normalized feature space.
|
||||
if (top >= INT_FEAT_RANGE)
|
||||
top = INT_FEAT_RANGE - 1;
|
||||
int bottom = blob->bounding_box().bottom() + bottom_shift;
|
||||
int min_bottom, max_bottom, min_top, max_top;
|
||||
unicharset.get_top_bottom(class_id, &min_bottom, &max_bottom,
|
||||
&min_top, &max_top);
|
||||
// Chars with a wild top range would mess up the result so ignore them.
|
||||
if (max_top - min_top > kMaxCharTopRange)
|
||||
continue;
|
||||
int misfit_dist = MAX((min_top - x_ht_acceptance_tolerance) - top,
|
||||
top - (max_top + x_ht_acceptance_tolerance));
|
||||
int height = top - kBlnBaselineOffset;
|
||||
if (debug_x_ht_level >= 2) {
|
||||
tprintf("Class %s: height=%d, bottom=%d,%d top=%d,%d, actual=%d,%d: ",
|
||||
unicharset.id_to_unichar(class_id),
|
||||
height, min_bottom, max_bottom, min_top, max_top,
|
||||
bottom, top);
|
||||
}
|
||||
// Use only chars that fit in the expected bottom range, and where
|
||||
// the range of tops is sensibly near the xheight.
|
||||
if (min_bottom <= bottom + x_ht_acceptance_tolerance &&
|
||||
bottom - x_ht_acceptance_tolerance <= max_bottom &&
|
||||
min_top > kBlnBaselineOffset &&
|
||||
max_top - kBlnBaselineOffset >= kBlnXHeight &&
|
||||
misfit_dist > 0) {
|
||||
// Compute the x-height position using proportionality between the
|
||||
// actual height and expected height.
|
||||
int min_xht = DivRounded(height * kBlnXHeight,
|
||||
max_top - kBlnBaselineOffset);
|
||||
int max_xht = DivRounded(height * kBlnXHeight,
|
||||
min_top - kBlnBaselineOffset);
|
||||
if (debug_x_ht_level >= 2) {
|
||||
tprintf(" xht range min=%d, max=%d\n", min_xht, max_xht);
|
||||
}
|
||||
// The range of expected heights gets a vote equal to the distance
|
||||
// of the actual top from the expected top.
|
||||
for (int y = min_xht; y <= max_xht; ++y)
|
||||
top_stats.add(y, misfit_dist);
|
||||
}
|
||||
else if ((min_bottom > bottom + x_ht_acceptance_tolerance ||
|
||||
bottom - x_ht_acceptance_tolerance > max_bottom) &&
|
||||
bottom_shift == 0) {
|
||||
// Get the range of required bottom shift.
|
||||
int min_shift = min_bottom - bottom;
|
||||
int max_shift = max_bottom - bottom;
|
||||
if (debug_x_ht_level >= 2) {
|
||||
tprintf(" bottom shift min=%d, max=%d\n", min_shift, max_shift);
|
||||
}
|
||||
// The range of expected shifts gets a vote equal to the min distance
|
||||
// of the actual bottom from the expected bottom, spread over the
|
||||
// range of its acceptance.
|
||||
int misfit_weight = abs(min_shift);
|
||||
if (max_shift > min_shift)
|
||||
misfit_weight /= max_shift - min_shift;
|
||||
for (int y = min_shift; y <= max_shift; ++y)
|
||||
shift_stats.add(y, misfit_weight);
|
||||
}
|
||||
else {
|
||||
if (bottom_shift == 0) {
|
||||
// Things with bottoms that are already ok need to say so, on the
|
||||
// 1st iteration only.
|
||||
shift_stats.add(0, kBlnBaselineOffset);
|
||||
}
|
||||
if (debug_x_ht_level >= 2) {
|
||||
tprintf(" already OK\n");
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
if (shift_stats.get_total() > top_stats.get_total()) {
|
||||
bottom_shift = IntCastRounded(shift_stats.median());
|
||||
if (debug_x_ht_level >= 2) {
|
||||
tprintf("Applying bottom shift=%d\n", bottom_shift);
|
||||
}
|
||||
}
|
||||
} while (bottom_shift != 0 &&
|
||||
top_stats.get_total() < shift_stats.get_total());
|
||||
// Baseline shift is opposite sign to the bottom shift.
|
||||
*baseline_shift = -bottom_shift / word_res->denorm.y_scale();
|
||||
if (debug_x_ht_level >= 2) {
|
||||
tprintf("baseline shift=%g\n", *baseline_shift);
|
||||
}
|
||||
if (top_stats.get_total() == 0)
|
||||
return bottom_shift != 0 ? word_res->x_height : 0.0f;
|
||||
// The new xheight is just the median vote, which is then scaled out
|
||||
// of BLN space back to pixel space to get the x-height in pixel space.
|
||||
float new_xht = top_stats.median();
|
||||
if (debug_x_ht_level >= 2) {
|
||||
tprintf("Median xht=%f\n", new_xht);
|
||||
tprintf("Mode20:A: New x-height = %f (norm), %f (orig)\n",
|
||||
new_xht, new_xht / word_res->denorm.y_scale());
|
||||
}
|
||||
// The xheight must change by at least x_ht_min_change to be used.
|
||||
if (fabs(new_xht - kBlnXHeight) >= x_ht_min_change)
|
||||
return new_xht / word_res->denorm.y_scale();
|
||||
else
|
||||
return bottom_shift != 0 ? word_res->x_height : 0.0f;
|
||||
}
|
||||
|
||||
} // namespace tesseract
|
|
@ -0,0 +1,390 @@
|
|||
///////////////////////////////////////////////////////////////////////
|
||||
// File: ltrresultiterator.cpp
|
||||
// Description: Iterator for tesseract results in strict left-to-right
|
||||
// order that avoids using tesseract internal data structures.
|
||||
// Author: Ray Smith
|
||||
// Created: Fri Feb 26 14:32:09 PST 2010
|
||||
//
|
||||
// (C) Copyright 2010, Google Inc.
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
///////////////////////////////////////////////////////////////////////
|
||||
|
||||
#include "ltrresultiterator.h"
|
||||
|
||||
#include "allheaders.h"
|
||||
#include "pageres.h"
|
||||
#include "strngs.h"
|
||||
#include "tesseractclass.h"
|
||||
|
||||
namespace tesseract {
|
||||
|
||||
LTRResultIterator::LTRResultIterator(PAGE_RES* page_res, Tesseract* tesseract,
|
||||
int scale, int scaled_yres,
|
||||
int rect_left, int rect_top,
|
||||
int rect_width, int rect_height)
|
||||
: PageIterator(page_res, tesseract, scale, scaled_yres,
|
||||
rect_left, rect_top, rect_width, rect_height),
|
||||
line_separator_("\n"),
|
||||
paragraph_separator_("\n") {
|
||||
}
|
||||
|
||||
LTRResultIterator::~LTRResultIterator() {
|
||||
}
|
||||
|
||||
// Returns the null terminated UTF-8 encoded text string for the current
|
||||
// object at the given level. Use delete [] to free after use.
|
||||
char* LTRResultIterator::GetUTF8Text(PageIteratorLevel level) const {
|
||||
if (it_->word() == NULL) return NULL; // Already at the end!
|
||||
STRING text;
|
||||
PAGE_RES_IT res_it(*it_);
|
||||
WERD_CHOICE* best_choice = res_it.word()->best_choice;
|
||||
ASSERT_HOST(best_choice != NULL);
|
||||
if (level == RIL_SYMBOL) {
|
||||
text = res_it.word()->BestUTF8(blob_index_, false);
|
||||
}
|
||||
else if (level == RIL_WORD) {
|
||||
text = best_choice->unichar_string();
|
||||
}
|
||||
else {
|
||||
bool eol = false; // end of line?
|
||||
bool eop = false; // end of paragraph?
|
||||
do { // for each paragraph in a block
|
||||
do { // for each text line in a paragraph
|
||||
do { // for each word in a text line
|
||||
best_choice = res_it.word()->best_choice;
|
||||
ASSERT_HOST(best_choice != NULL);
|
||||
text += best_choice->unichar_string();
|
||||
text += " ";
|
||||
res_it.forward();
|
||||
eol = res_it.row() != res_it.prev_row();
|
||||
} while (!eol);
|
||||
text.truncate_at(text.length() - 1);
|
||||
text += line_separator_;
|
||||
eop = res_it.block() != res_it.prev_block() ||
|
||||
res_it.row()->row->para() != res_it.prev_row()->row->para();
|
||||
} while (level != RIL_TEXTLINE && !eop);
|
||||
if (eop) text += paragraph_separator_;
|
||||
} while (level == RIL_BLOCK && res_it.block() == res_it.prev_block());
|
||||
}
|
||||
int length = text.length() + 1;
|
||||
char* result = new char[length];
|
||||
strncpy(result, text.string(), length);
|
||||
return result;
|
||||
}
|
||||
|
||||
// Set the string inserted at the end of each text line. "\n" by default.
|
||||
void LTRResultIterator::SetLineSeparator(const char *new_line) {
|
||||
line_separator_ = new_line;
|
||||
}
|
||||
|
||||
// Set the string inserted at the end of each paragraph. "\n" by default.
|
||||
void LTRResultIterator::SetParagraphSeparator(const char *new_para) {
|
||||
paragraph_separator_ = new_para;
|
||||
}
|
||||
|
||||
// Returns the mean confidence of the current object at the given level.
|
||||
// The number should be interpreted as a percent probability. (0.0f-100.0f)
|
||||
float LTRResultIterator::Confidence(PageIteratorLevel level) const {
|
||||
if (it_->word() == NULL) return 0.0f; // Already at the end!
|
||||
float mean_certainty = 0.0f;
|
||||
int certainty_count = 0;
|
||||
PAGE_RES_IT res_it(*it_);
|
||||
WERD_CHOICE* best_choice = res_it.word()->best_choice;
|
||||
ASSERT_HOST(best_choice != NULL);
|
||||
switch (level) {
|
||||
case RIL_BLOCK:
|
||||
do {
|
||||
best_choice = res_it.word()->best_choice;
|
||||
ASSERT_HOST(best_choice != NULL);
|
||||
mean_certainty += best_choice->certainty();
|
||||
++certainty_count;
|
||||
res_it.forward();
|
||||
} while (res_it.block() == res_it.prev_block());
|
||||
break;
|
||||
case RIL_PARA:
|
||||
do {
|
||||
best_choice = res_it.word()->best_choice;
|
||||
ASSERT_HOST(best_choice != NULL);
|
||||
mean_certainty += best_choice->certainty();
|
||||
++certainty_count;
|
||||
res_it.forward();
|
||||
} while (res_it.block() == res_it.prev_block() &&
|
||||
res_it.row()->row->para() == res_it.prev_row()->row->para());
|
||||
break;
|
||||
case RIL_TEXTLINE:
|
||||
do {
|
||||
best_choice = res_it.word()->best_choice;
|
||||
ASSERT_HOST(best_choice != NULL);
|
||||
mean_certainty += best_choice->certainty();
|
||||
++certainty_count;
|
||||
res_it.forward();
|
||||
} while (res_it.row() == res_it.prev_row());
|
||||
break;
|
||||
case RIL_WORD:
|
||||
mean_certainty += best_choice->certainty();
|
||||
++certainty_count;
|
||||
break;
|
||||
case RIL_SYMBOL:
|
||||
mean_certainty += best_choice->certainty(blob_index_);
|
||||
++certainty_count;
|
||||
}
|
||||
if (certainty_count > 0) {
|
||||
mean_certainty /= certainty_count;
|
||||
float confidence = 100 + 5 * mean_certainty;
|
||||
if (confidence < 0.0f) confidence = 0.0f;
|
||||
if (confidence > 100.0f) confidence = 100.0f;
|
||||
return confidence;
|
||||
}
|
||||
return 0.0f;
|
||||
}
|
||||
|
||||
void LTRResultIterator::RowAttributes(float* row_height, float* descenders,
|
||||
float* ascenders) const {
|
||||
*row_height = it_->row()->row->x_height() + it_->row()->row->ascenders() -
|
||||
it_->row()->row->descenders();
|
||||
*descenders = it_->row()->row->descenders();
|
||||
*ascenders = it_->row()->row->ascenders();
|
||||
}
|
||||
|
||||
// Returns the font attributes of the current word. If iterating at a higher
|
||||
// level object than words, eg textlines, then this will return the
|
||||
// attributes of the first word in that textline.
|
||||
// The actual return value is a string representing a font name. It points
|
||||
// to an internal table and SHOULD NOT BE DELETED. Lifespan is the same as
|
||||
// the iterator itself, ie rendered invalid by various members of
|
||||
// TessBaseAPI, including Init, SetImage, End or deleting the TessBaseAPI.
|
||||
// Pointsize is returned in printers points (1/72 inch.)
|
||||
const char* LTRResultIterator::WordFontAttributes(bool* is_bold,
|
||||
bool* is_italic,
|
||||
bool* is_underlined,
|
||||
bool* is_monospace,
|
||||
bool* is_serif,
|
||||
bool* is_smallcaps,
|
||||
int* pointsize,
|
||||
int* font_id) const {
|
||||
if (it_->word() == NULL) return NULL; // Already at the end!
|
||||
if (it_->word()->fontinfo == NULL) {
|
||||
*font_id = -1;
|
||||
return NULL; // No font information.
|
||||
}
|
||||
const FontInfo& font_info = *it_->word()->fontinfo;
|
||||
*font_id = font_info.universal_id;
|
||||
*is_bold = font_info.is_bold();
|
||||
*is_italic = font_info.is_italic();
|
||||
*is_underlined = false; // TODO(rays) fix this!
|
||||
*is_monospace = font_info.is_fixed_pitch();
|
||||
*is_serif = font_info.is_serif();
|
||||
*is_smallcaps = it_->word()->small_caps;
|
||||
float row_height = it_->row()->row->x_height() +
|
||||
it_->row()->row->ascenders() - it_->row()->row->descenders();
|
||||
// Convert from pixels to printers points.
|
||||
*pointsize = scaled_yres_ > 0
|
||||
? static_cast<int>(row_height * kPointsPerInch / scaled_yres_ + 0.5)
|
||||
: 0;
|
||||
|
||||
return font_info.name;
|
||||
}
|
||||
|
||||
// Returns the name of the language used to recognize this word.
|
||||
const char* LTRResultIterator::WordRecognitionLanguage() const {
|
||||
if (it_->word() == NULL || it_->word()->tesseract == NULL) return NULL;
|
||||
return it_->word()->tesseract->lang.string();
|
||||
}
|
||||
|
||||
// Return the overall directionality of this word.
|
||||
StrongScriptDirection LTRResultIterator::WordDirection() const {
|
||||
if (it_->word() == NULL) return DIR_NEUTRAL;
|
||||
bool has_rtl = it_->word()->AnyRtlCharsInWord();
|
||||
bool has_ltr = it_->word()->AnyLtrCharsInWord();
|
||||
if (has_rtl && !has_ltr)
|
||||
return DIR_RIGHT_TO_LEFT;
|
||||
if (has_ltr && !has_rtl)
|
||||
return DIR_LEFT_TO_RIGHT;
|
||||
if (!has_ltr && !has_rtl)
|
||||
return DIR_NEUTRAL;
|
||||
return DIR_MIX;
|
||||
}
|
||||
|
||||
// Returns true if the current word was found in a dictionary.
|
||||
bool LTRResultIterator::WordIsFromDictionary() const {
|
||||
if (it_->word() == NULL) return false; // Already at the end!
|
||||
int permuter = it_->word()->best_choice->permuter();
|
||||
return permuter == SYSTEM_DAWG_PERM || permuter == FREQ_DAWG_PERM ||
|
||||
permuter == USER_DAWG_PERM;
|
||||
}
|
||||
|
||||
// Returns true if the current word is numeric.
|
||||
bool LTRResultIterator::WordIsNumeric() const {
|
||||
if (it_->word() == NULL) return false; // Already at the end!
|
||||
int permuter = it_->word()->best_choice->permuter();
|
||||
return permuter == NUMBER_PERM;
|
||||
}
|
||||
|
||||
// Returns true if the word contains blamer information.
|
||||
bool LTRResultIterator::HasBlamerInfo() const {
|
||||
return it_->word() != NULL && it_->word()->blamer_bundle != NULL &&
|
||||
it_->word()->blamer_bundle->HasDebugInfo();
|
||||
}
|
||||
|
||||
// Returns the pointer to ParamsTrainingBundle stored in the BlamerBundle
|
||||
// of the current word.
|
||||
const void *LTRResultIterator::GetParamsTrainingBundle() const {
|
||||
return (it_->word() != NULL && it_->word()->blamer_bundle != NULL) ?
|
||||
&(it_->word()->blamer_bundle->params_training_bundle()) : NULL;
|
||||
}
|
||||
|
||||
// Returns the pointer to the string with blamer information for this word.
|
||||
// Assumes that the word's blamer_bundle is not NULL.
|
||||
const char *LTRResultIterator::GetBlamerDebug() const {
|
||||
return it_->word()->blamer_bundle->debug().string();
|
||||
}
|
||||
|
||||
// Returns the pointer to the string with misadaption information for this word.
|
||||
// Assumes that the word's blamer_bundle is not NULL.
|
||||
const char *LTRResultIterator::GetBlamerMisadaptionDebug() const {
|
||||
return it_->word()->blamer_bundle->misadaption_debug().string();
|
||||
}
|
||||
|
||||
// Returns true if a truth string was recorded for the current word.
|
||||
bool LTRResultIterator::HasTruthString() const {
|
||||
if (it_->word() == NULL) return false; // Already at the end!
|
||||
if (it_->word()->blamer_bundle == NULL ||
|
||||
it_->word()->blamer_bundle->NoTruth()) {
|
||||
return false; // no truth information for this word
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
// Returns true if the given string is equivalent to the truth string for
|
||||
// the current word.
|
||||
bool LTRResultIterator::EquivalentToTruth(const char *str) const {
|
||||
if (!HasTruthString()) return false;
|
||||
ASSERT_HOST(it_->word()->uch_set != NULL);
|
||||
WERD_CHOICE str_wd(str, *(it_->word()->uch_set));
|
||||
return it_->word()->blamer_bundle->ChoiceIsCorrect(&str_wd);
|
||||
}
|
||||
|
||||
// Returns the null terminated UTF-8 encoded truth string for the current word.
|
||||
// Use delete [] to free after use.
|
||||
char* LTRResultIterator::WordTruthUTF8Text() const {
|
||||
if (!HasTruthString()) return NULL;
|
||||
STRING truth_text = it_->word()->blamer_bundle->TruthString();
|
||||
int length = truth_text.length() + 1;
|
||||
char* result = new char[length];
|
||||
strncpy(result, truth_text.string(), length);
|
||||
return result;
|
||||
}
|
||||
|
||||
// Returns the null terminated UTF-8 encoded normalized OCR string for the
|
||||
// current word. Use delete [] to free after use.
|
||||
char* LTRResultIterator::WordNormedUTF8Text() const {
|
||||
if (it_->word() == NULL) return NULL; // Already at the end!
|
||||
STRING ocr_text;
|
||||
WERD_CHOICE* best_choice = it_->word()->best_choice;
|
||||
const UNICHARSET *unicharset = it_->word()->uch_set;
|
||||
ASSERT_HOST(best_choice != NULL);
|
||||
for (int i = 0; i < best_choice->length(); ++i) {
|
||||
ocr_text += unicharset->get_normed_unichar(best_choice->unichar_id(i));
|
||||
}
|
||||
int length = ocr_text.length() + 1;
|
||||
char* result = new char[length];
|
||||
strncpy(result, ocr_text.string(), length);
|
||||
return result;
|
||||
}
|
||||
|
||||
// Returns a pointer to serialized choice lattice.
|
||||
// Fills lattice_size with the number of bytes in lattice data.
|
||||
const char *LTRResultIterator::WordLattice(int *lattice_size) const {
|
||||
if (it_->word() == NULL) return NULL; // Already at the end!
|
||||
if (it_->word()->blamer_bundle == NULL) return NULL;
|
||||
*lattice_size = it_->word()->blamer_bundle->lattice_size();
|
||||
return it_->word()->blamer_bundle->lattice_data();
|
||||
}
|
||||
|
||||
// Returns true if the current symbol is a superscript.
|
||||
// If iterating at a higher level object than symbols, eg words, then
|
||||
// this will return the attributes of the first symbol in that word.
|
||||
bool LTRResultIterator::SymbolIsSuperscript() const {
|
||||
if (cblob_it_ == NULL && it_->word() != NULL)
|
||||
return it_->word()->best_choice->BlobPosition(blob_index_) ==
|
||||
SP_SUPERSCRIPT;
|
||||
return false;
|
||||
}
|
||||
|
||||
// Returns true if the current symbol is a subscript.
|
||||
// If iterating at a higher level object than symbols, eg words, then
|
||||
// this will return the attributes of the first symbol in that word.
|
||||
bool LTRResultIterator::SymbolIsSubscript() const {
|
||||
if (cblob_it_ == NULL && it_->word() != NULL)
|
||||
return it_->word()->best_choice->BlobPosition(blob_index_) == SP_SUBSCRIPT;
|
||||
return false;
|
||||
}
|
||||
|
||||
// Returns true if the current symbol is a dropcap.
|
||||
// If iterating at a higher level object than symbols, eg words, then
|
||||
// this will return the attributes of the first symbol in that word.
|
||||
bool LTRResultIterator::SymbolIsDropcap() const {
|
||||
if (cblob_it_ == NULL && it_->word() != NULL)
|
||||
return it_->word()->best_choice->BlobPosition(blob_index_) == SP_DROPCAP;
|
||||
return false;
|
||||
}
|
||||
|
||||
ChoiceIterator::ChoiceIterator(const LTRResultIterator& result_it) {
|
||||
ASSERT_HOST(result_it.it_->word() != NULL);
|
||||
word_res_ = result_it.it_->word();
|
||||
BLOB_CHOICE_LIST* choices = NULL;
|
||||
if (word_res_->ratings != NULL)
|
||||
choices = word_res_->GetBlobChoices(result_it.blob_index_);
|
||||
if (choices != NULL && !choices->empty()) {
|
||||
choice_it_ = new BLOB_CHOICE_IT(choices);
|
||||
choice_it_->mark_cycle_pt();
|
||||
}
|
||||
else {
|
||||
choice_it_ = NULL;
|
||||
}
|
||||
}
|
||||
|
||||
ChoiceIterator::~ChoiceIterator() {
|
||||
delete choice_it_;
|
||||
}
|
||||
|
||||
// Moves to the next choice for the symbol and returns false if there
|
||||
// are none left.
|
||||
bool ChoiceIterator::Next() {
|
||||
if (choice_it_ == NULL)
|
||||
return false;
|
||||
choice_it_->forward();
|
||||
return !choice_it_->cycled_list();
|
||||
}
|
||||
|
||||
// Returns the null terminated UTF-8 encoded text string for the current
|
||||
// choice. Do NOT use delete [] to free after use.
|
||||
const char* ChoiceIterator::GetUTF8Text() const {
|
||||
if (choice_it_ == NULL)
|
||||
return NULL;
|
||||
UNICHAR_ID id = choice_it_->data()->unichar_id();
|
||||
return word_res_->uch_set->id_to_unichar_ext(id);
|
||||
}
|
||||
|
||||
// Returns the confidence of the current choice.
|
||||
// The number should be interpreted as a percent probability. (0.0f-100.0f)
|
||||
float ChoiceIterator::Confidence() const {
|
||||
if (choice_it_ == NULL)
|
||||
return 0.0f;
|
||||
float confidence = 100 + 5 * choice_it_->data()->certainty();
|
||||
if (confidence < 0.0f) confidence = 0.0f;
|
||||
if (confidence > 100.0f) confidence = 100.0f;
|
||||
return confidence;
|
||||
}
|
||||
|
||||
|
||||
} // namespace tesseract.
|
|
@ -0,0 +1,218 @@
|
|||
///////////////////////////////////////////////////////////////////////
|
||||
// File: ltrresultiterator.h
|
||||
// Description: Iterator for tesseract results in strict left-to-right
|
||||
// order that avoids using tesseract internal data structures.
|
||||
// Author: Ray Smith
|
||||
// Created: Fri Feb 26 11:01:06 PST 2010
|
||||
//
|
||||
// (C) Copyright 2010, Google Inc.
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
///////////////////////////////////////////////////////////////////////
|
||||
|
||||
#ifndef TESSERACT_CCMAIN_LTR_RESULT_ITERATOR_H__
|
||||
#define TESSERACT_CCMAIN_LTR_RESULT_ITERATOR_H__
|
||||
|
||||
#include "platform.h"
|
||||
#include "pageiterator.h"
|
||||
#include "unichar.h"
|
||||
|
||||
class BLOB_CHOICE_IT;
|
||||
class WERD_RES;
|
||||
|
||||
namespace tesseract {
|
||||
|
||||
class Tesseract;
|
||||
|
||||
// Class to iterate over tesseract results, providing access to all levels
|
||||
// of the page hierarchy, without including any tesseract headers or having
|
||||
// to handle any tesseract structures.
|
||||
// WARNING! This class points to data held within the TessBaseAPI class, and
|
||||
// therefore can only be used while the TessBaseAPI class still exists and
|
||||
// has not been subjected to a call of Init, SetImage, Recognize, Clear, End
|
||||
// DetectOS, or anything else that changes the internal PAGE_RES.
|
||||
// See apitypes.h for the definition of PageIteratorLevel.
|
||||
// See also base class PageIterator, which contains the bulk of the interface.
|
||||
// LTRResultIterator adds text-specific methods for access to OCR output.
|
||||
|
||||
class TESS_API LTRResultIterator : public PageIterator {
|
||||
friend class ChoiceIterator;
|
||||
public:
|
||||
// page_res and tesseract come directly from the BaseAPI.
|
||||
// The rectangle parameters are copied indirectly from the Thresholder,
|
||||
// via the BaseAPI. They represent the coordinates of some rectangle in an
|
||||
// original image (in top-left-origin coordinates) and therefore the top-left
|
||||
// needs to be added to any output boxes in order to specify coordinates
|
||||
// in the original image. See TessBaseAPI::SetRectangle.
|
||||
// The scale and scaled_yres are in case the Thresholder scaled the image
|
||||
// rectangle prior to thresholding. Any coordinates in tesseract's image
|
||||
// must be divided by scale before adding (rect_left, rect_top).
|
||||
// The scaled_yres indicates the effective resolution of the binary image
|
||||
// that tesseract has been given by the Thresholder.
|
||||
// After the constructor, Begin has already been called.
|
||||
LTRResultIterator(PAGE_RES* page_res, Tesseract* tesseract,
|
||||
int scale, int scaled_yres,
|
||||
int rect_left, int rect_top,
|
||||
int rect_width, int rect_height);
|
||||
virtual ~LTRResultIterator();
|
||||
|
||||
// LTRResultIterators may be copied! This makes it possible to iterate over
|
||||
// all the objects at a lower level, while maintaining an iterator to
|
||||
// objects at a higher level. These constructors DO NOT CALL Begin, so
|
||||
// iterations will continue from the location of src.
|
||||
// TODO: For now the copy constructor and operator= only need the base class
|
||||
// versions, but if new data members are added, don't forget to add them!
|
||||
|
||||
// ============= Moving around within the page ============.
|
||||
|
||||
// See PageIterator.
|
||||
|
||||
// ============= Accessing data ==============.
|
||||
|
||||
// Returns the null terminated UTF-8 encoded text string for the current
|
||||
// object at the given level. Use delete [] to free after use.
|
||||
char* GetUTF8Text(PageIteratorLevel level) const;
|
||||
|
||||
// Set the string inserted at the end of each text line. "\n" by default.
|
||||
void SetLineSeparator(const char *new_line);
|
||||
|
||||
// Set the string inserted at the end of each paragraph. "\n" by default.
|
||||
void SetParagraphSeparator(const char *new_para);
|
||||
|
||||
// Returns the mean confidence of the current object at the given level.
|
||||
// The number should be interpreted as a percent probability. (0.0f-100.0f)
|
||||
float Confidence(PageIteratorLevel level) const;
|
||||
|
||||
// Returns the attributes of the current row.
|
||||
void RowAttributes(float* row_height, float* descenders,
|
||||
float* ascenders) const;
|
||||
|
||||
// ============= Functions that refer to words only ============.
|
||||
|
||||
// Returns the font attributes of the current word. If iterating at a higher
|
||||
// level object than words, eg textlines, then this will return the
|
||||
// attributes of the first word in that textline.
|
||||
// The actual return value is a string representing a font name. It points
|
||||
// to an internal table and SHOULD NOT BE DELETED. Lifespan is the same as
|
||||
// the iterator itself, ie rendered invalid by various members of
|
||||
// TessBaseAPI, including Init, SetImage, End or deleting the TessBaseAPI.
|
||||
// Pointsize is returned in printers points (1/72 inch.)
|
||||
const char* WordFontAttributes(bool* is_bold,
|
||||
bool* is_italic,
|
||||
bool* is_underlined,
|
||||
bool* is_monospace,
|
||||
bool* is_serif,
|
||||
bool* is_smallcaps,
|
||||
int* pointsize,
|
||||
int* font_id) const;
|
||||
|
||||
// Return the name of the language used to recognize this word.
|
||||
// On error, NULL. Do not delete this pointer.
|
||||
const char* WordRecognitionLanguage() const;
|
||||
|
||||
// Return the overall directionality of this word.
|
||||
StrongScriptDirection WordDirection() const;
|
||||
|
||||
// Returns true if the current word was found in a dictionary.
|
||||
bool WordIsFromDictionary() const;
|
||||
|
||||
// Returns true if the current word is numeric.
|
||||
bool WordIsNumeric() const;
|
||||
|
||||
// Returns true if the word contains blamer information.
|
||||
bool HasBlamerInfo() const;
|
||||
|
||||
// Returns the pointer to ParamsTrainingBundle stored in the BlamerBundle
|
||||
// of the current word.
|
||||
const void *GetParamsTrainingBundle() const;
|
||||
|
||||
// Returns a pointer to the string with blamer information for this word.
|
||||
// Assumes that the word's blamer_bundle is not NULL.
|
||||
const char *GetBlamerDebug() const;
|
||||
|
||||
// Returns a pointer to the string with misadaption information for this word.
|
||||
// Assumes that the word's blamer_bundle is not NULL.
|
||||
const char *GetBlamerMisadaptionDebug() const;
|
||||
|
||||
// Returns true if a truth string was recorded for the current word.
|
||||
bool HasTruthString() const;
|
||||
|
||||
// Returns true if the given string is equivalent to the truth string for
|
||||
// the current word.
|
||||
bool EquivalentToTruth(const char *str) const;
|
||||
|
||||
// Returns a null terminated UTF-8 encoded truth string for the current word.
|
||||
// Use delete [] to free after use.
|
||||
char* WordTruthUTF8Text() const;
|
||||
|
||||
// Returns a null terminated UTF-8 encoded normalized OCR string for the
|
||||
// current word. Use delete [] to free after use.
|
||||
char* WordNormedUTF8Text() const;
|
||||
|
||||
// Returns a pointer to serialized choice lattice.
|
||||
// Fills lattice_size with the number of bytes in lattice data.
|
||||
const char *WordLattice(int *lattice_size) const;
|
||||
|
||||
// ============= Functions that refer to symbols only ============.
|
||||
|
||||
// Returns true if the current symbol is a superscript.
|
||||
// If iterating at a higher level object than symbols, eg words, then
|
||||
// this will return the attributes of the first symbol in that word.
|
||||
bool SymbolIsSuperscript() const;
|
||||
// Returns true if the current symbol is a subscript.
|
||||
// If iterating at a higher level object than symbols, eg words, then
|
||||
// this will return the attributes of the first symbol in that word.
|
||||
bool SymbolIsSubscript() const;
|
||||
// Returns true if the current symbol is a dropcap.
|
||||
// If iterating at a higher level object than symbols, eg words, then
|
||||
// this will return the attributes of the first symbol in that word.
|
||||
bool SymbolIsDropcap() const;
|
||||
|
||||
protected:
|
||||
const char *line_separator_;
|
||||
const char *paragraph_separator_;
|
||||
};
|
||||
|
||||
// Class to iterate over the classifier choices for a single RIL_SYMBOL.
|
||||
class ChoiceIterator {
|
||||
public:
|
||||
// Construction is from a LTRResultIterator that points to the symbol of
|
||||
// interest. The ChoiceIterator allows a one-shot iteration over the
|
||||
// choices for this symbol and after that is is useless.
|
||||
explicit ChoiceIterator(const LTRResultIterator& result_it);
|
||||
~ChoiceIterator();
|
||||
|
||||
// Moves to the next choice for the symbol and returns false if there
|
||||
// are none left.
|
||||
bool Next();
|
||||
|
||||
// ============= Accessing data ==============.
|
||||
|
||||
// Returns the null terminated UTF-8 encoded text string for the current
|
||||
// choice.
|
||||
// NOTE: Unlike LTRResultIterator::GetUTF8Text, the return points to an
|
||||
// internal structure and should NOT be delete[]ed to free after use.
|
||||
const char* GetUTF8Text() const;
|
||||
|
||||
// Returns the confidence of the current choice.
|
||||
// The number should be interpreted as a percent probability. (0.0f-100.0f)
|
||||
float Confidence() const;
|
||||
|
||||
private:
|
||||
// Pointer to the WERD_RES object owned by the API.
|
||||
WERD_RES* word_res_;
|
||||
// Iterator over the blob choices.
|
||||
BLOB_CHOICE_IT* choice_it_;
|
||||
};
|
||||
|
||||
} // namespace tesseract.
|
||||
|
||||
#endif // TESSERACT_CCMAIN_LTR_RESULT_ITERATOR_H__
|
|
@ -0,0 +1,38 @@
|
|||
///////////////////////////////////////////////////////////////////////
|
||||
// File: mathfix.h
|
||||
// Description: Implement missing math functions
|
||||
// Author: zdenop
|
||||
// Created: Fri Feb 03 06:45:06 CET 2012
|
||||
//
|
||||
// (C) Copyright 2012, Google Inc.
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
///////////////////////////////////////////////////////////////////////
|
||||
|
||||
#ifndef VS2008_INCLUDE_MATHFIX_H_
|
||||
#define VS2008_INCLUDE_MATHFIXT_H_
|
||||
|
||||
#ifndef _MSC_VER
|
||||
#error "Use this header only with Microsoft Visual C++ compilers!"
|
||||
#endif
|
||||
|
||||
#include <math.h>
|
||||
#include <float.h> // for _isnan(), _finite() on VC++
|
||||
|
||||
#if _MSC_VER < 1800
|
||||
#define isnan(x) _isnan(x)
|
||||
#define isinf(x) (!_finite(x))
|
||||
#define fmax max //VC++ does not implement all the provisions of C99 Standard
|
||||
#define round(x) roundf(x)
|
||||
inline float roundf(float num) { return num > 0 ? floorf(num + 0.5f) : ceilf(num - 0.5f); }
|
||||
#endif
|
||||
|
||||
#endif // VS2008_INCLUDE_MATHFIXT_H_
|
|
@ -0,0 +1,64 @@
|
|||
///////////////////////////////////////////////////////////////////////
|
||||
// File: mutableiterator.h
|
||||
// Description: Iterator for tesseract results providing access to
|
||||
// both high-level API and Tesseract internal data structures.
|
||||
// Author: David Eger
|
||||
// Created: Thu Feb 24 19:01:06 PST 2011
|
||||
//
|
||||
// (C) Copyright 2011, Google Inc.
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
///////////////////////////////////////////////////////////////////////
|
||||
|
||||
#ifndef TESSERACT_CCMAIN_MUTABLEITERATOR_H__
|
||||
#define TESSERACT_CCMAIN_MUTABLEITERATOR_H__
|
||||
|
||||
#include "resultiterator.h"
|
||||
|
||||
class BLOB_CHOICE_IT;
|
||||
|
||||
namespace tesseract {
|
||||
|
||||
class Tesseract;
|
||||
|
||||
// Class to iterate over tesseract results, providing access to all levels
|
||||
// of the page hierarchy, without including any tesseract headers or having
|
||||
// to handle any tesseract structures.
|
||||
// WARNING! This class points to data held within the TessBaseAPI class, and
|
||||
// therefore can only be used while the TessBaseAPI class still exists and
|
||||
// has not been subjected to a call of Init, SetImage, Recognize, Clear, End
|
||||
// DetectOS, or anything else that changes the internal PAGE_RES.
|
||||
// See apitypes.h for the definition of PageIteratorLevel.
|
||||
// See also base class PageIterator, which contains the bulk of the interface.
|
||||
// ResultIterator adds text-specific methods for access to OCR output.
|
||||
// MutableIterator adds access to internal data structures.
|
||||
|
||||
class MutableIterator : public ResultIterator {
|
||||
public:
|
||||
// See argument descriptions in ResultIterator()
|
||||
MutableIterator(PAGE_RES* page_res, Tesseract* tesseract,
|
||||
int scale, int scaled_yres,
|
||||
int rect_left, int rect_top,
|
||||
int rect_width, int rect_height)
|
||||
: ResultIterator(
|
||||
LTRResultIterator(page_res, tesseract, scale, scaled_yres, rect_left,
|
||||
rect_top, rect_width, rect_height)) {}
|
||||
virtual ~MutableIterator() {}
|
||||
|
||||
// See PageIterator and ResultIterator for most calls.
|
||||
|
||||
// Return access to Tesseract internals.
|
||||
const PAGE_RES_IT *PageResIt() const { return it_; }
|
||||
};
|
||||
|
||||
} // namespace tesseract.
|
||||
|
||||
#endif // TESSERACT_CCMAIN_MUTABLEITERATOR_H__
|
|
@ -0,0 +1,585 @@
|
|||
///////////////////////////////////////////////////////////////////////
|
||||
// File: osdetect.cpp
|
||||
// Description: Orientation and script detection.
|
||||
// Author: Samuel Charron
|
||||
// Ranjith Unnikrishnan
|
||||
//
|
||||
// (C) Copyright 2008, Google Inc.
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
///////////////////////////////////////////////////////////////////////
|
||||
|
||||
#include "osdetect.h"
|
||||
|
||||
#include "blobbox.h"
|
||||
#include "blread.h"
|
||||
#include "colfind.h"
|
||||
#include "fontinfo.h"
|
||||
#include "imagefind.h"
|
||||
#include "linefind.h"
|
||||
#include "oldlist.h"
|
||||
#include "qrsequence.h"
|
||||
#include "ratngs.h"
|
||||
#include "strngs.h"
|
||||
#include "tabvector.h"
|
||||
#include "tesseractclass.h"
|
||||
#include "textord.h"
|
||||
|
||||
const int kMinCharactersToTry = 20;
|
||||
const int kMaxCharactersToTry = 5 * kMinCharactersToTry;
|
||||
|
||||
const float kSizeRatioToReject = 2.0;
|
||||
const int kMinAcceptableBlobHeight = 10;
|
||||
|
||||
const float kScriptAcceptRatio = 1.3;
|
||||
|
||||
const float kHanRatioInKorean = 0.7;
|
||||
const float kHanRatioInJapanese = 0.3;
|
||||
|
||||
const float kNonAmbiguousMargin = 1.0;
|
||||
|
||||
// General scripts
|
||||
static const char* han_script = "Han";
|
||||
static const char* latin_script = "Latin";
|
||||
static const char* katakana_script = "Katakana";
|
||||
static const char* hiragana_script = "Hiragana";
|
||||
static const char* hangul_script = "Hangul";
|
||||
|
||||
// Pseudo-scripts Name
|
||||
const char* ScriptDetector::korean_script_ = "Korean";
|
||||
const char* ScriptDetector::japanese_script_ = "Japanese";
|
||||
const char* ScriptDetector::fraktur_script_ = "Fraktur";
|
||||
|
||||
// Minimum believable resolution.
|
||||
const int kMinCredibleResolution = 70;
|
||||
|
||||
void OSResults::update_best_orientation() {
|
||||
float first = orientations[0];
|
||||
float second = orientations[1];
|
||||
best_result.orientation_id = 0;
|
||||
if (orientations[0] < orientations[1]) {
|
||||
first = orientations[1];
|
||||
second = orientations[0];
|
||||
best_result.orientation_id = 1;
|
||||
}
|
||||
for (int i = 2; i < 4; ++i) {
|
||||
if (orientations[i] > first) {
|
||||
second = first;
|
||||
first = orientations[i];
|
||||
best_result.orientation_id = i;
|
||||
}
|
||||
else if (orientations[i] > second) {
|
||||
second = orientations[i];
|
||||
}
|
||||
}
|
||||
// Store difference of top two orientation scores.
|
||||
best_result.oconfidence = first - second;
|
||||
}
|
||||
|
||||
void OSResults::set_best_orientation(int orientation_id) {
|
||||
best_result.orientation_id = orientation_id;
|
||||
best_result.oconfidence = 0;
|
||||
}
|
||||
|
||||
void OSResults::update_best_script(int orientation) {
|
||||
// We skip index 0 to ignore the "Common" script.
|
||||
float first = scripts_na[orientation][1];
|
||||
float second = scripts_na[orientation][2];
|
||||
best_result.script_id = 1;
|
||||
if (scripts_na[orientation][1] < scripts_na[orientation][2]) {
|
||||
first = scripts_na[orientation][2];
|
||||
second = scripts_na[orientation][1];
|
||||
best_result.script_id = 2;
|
||||
}
|
||||
for (int i = 3; i < kMaxNumberOfScripts; ++i) {
|
||||
if (scripts_na[orientation][i] > first) {
|
||||
best_result.script_id = i;
|
||||
second = first;
|
||||
first = scripts_na[orientation][i];
|
||||
}
|
||||
else if (scripts_na[orientation][i] > second) {
|
||||
second = scripts_na[orientation][i];
|
||||
}
|
||||
}
|
||||
best_result.sconfidence =
|
||||
(first / second - 1.0) / (kScriptAcceptRatio - 1.0);
|
||||
}
|
||||
|
||||
int OSResults::get_best_script(int orientation_id) const {
|
||||
int max_id = -1;
|
||||
for (int j = 0; j < kMaxNumberOfScripts; ++j) {
|
||||
const char *script = unicharset->get_script_from_script_id(j);
|
||||
if (strcmp(script, "Common") && strcmp(script, "NULL")) {
|
||||
if (max_id == -1 ||
|
||||
scripts_na[orientation_id][j] > scripts_na[orientation_id][max_id])
|
||||
max_id = j;
|
||||
}
|
||||
}
|
||||
return max_id;
|
||||
}
|
||||
|
||||
// Print the script scores for all possible orientations.
|
||||
void OSResults::print_scores(void) const {
|
||||
for (int i = 0; i < 4; ++i) {
|
||||
tprintf("Orientation id #%d", i);
|
||||
print_scores(i);
|
||||
}
|
||||
}
|
||||
|
||||
// Print the script scores for the given candidate orientation.
|
||||
void OSResults::print_scores(int orientation_id) const {
|
||||
for (int j = 0; j < kMaxNumberOfScripts; ++j) {
|
||||
if (scripts_na[orientation_id][j]) {
|
||||
tprintf("%12s\t: %f\n", unicharset->get_script_from_script_id(j),
|
||||
scripts_na[orientation_id][j]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Accumulate scores with given OSResults instance and update the best script.
|
||||
void OSResults::accumulate(const OSResults& osr) {
|
||||
for (int i = 0; i < 4; ++i) {
|
||||
orientations[i] += osr.orientations[i];
|
||||
for (int j = 0; j < kMaxNumberOfScripts; ++j)
|
||||
scripts_na[i][j] += osr.scripts_na[i][j];
|
||||
}
|
||||
unicharset = osr.unicharset;
|
||||
update_best_orientation();
|
||||
update_best_script(best_result.orientation_id);
|
||||
}
|
||||
|
||||
// Detect and erase horizontal/vertical lines and picture regions from the
|
||||
// image, so that non-text blobs are removed from consideration.
|
||||
void remove_nontext_regions(tesseract::Tesseract *tess, BLOCK_LIST *blocks,
|
||||
TO_BLOCK_LIST *to_blocks) {
|
||||
Pix *pix = tess->pix_binary();
|
||||
ASSERT_HOST(pix != NULL);
|
||||
int vertical_x = 0;
|
||||
int vertical_y = 1;
|
||||
tesseract::TabVector_LIST v_lines;
|
||||
tesseract::TabVector_LIST h_lines;
|
||||
int resolution;
|
||||
if (kMinCredibleResolution > pixGetXRes(pix)) {
|
||||
resolution = kMinCredibleResolution;
|
||||
tprintf("Warning. Invalid resolution %d dpi. Using %d instead.\n",
|
||||
pixGetXRes(pix), resolution);
|
||||
}
|
||||
else {
|
||||
resolution = pixGetXRes(pix);
|
||||
}
|
||||
|
||||
tesseract::LineFinder::FindAndRemoveLines(resolution, false, pix,
|
||||
&vertical_x, &vertical_y,
|
||||
NULL, &v_lines, &h_lines);
|
||||
Pix* im_pix = tesseract::ImageFind::FindImages(pix);
|
||||
if (im_pix != NULL) {
|
||||
pixSubtract(pix, pix, im_pix);
|
||||
pixDestroy(&im_pix);
|
||||
}
|
||||
tess->mutable_textord()->find_components(tess->pix_binary(),
|
||||
blocks, to_blocks);
|
||||
}
|
||||
|
||||
// Find connected components in the page and process a subset until finished or
|
||||
// a stopping criterion is met.
|
||||
// Returns the number of blobs used in making the estimate. 0 implies failure.
|
||||
int orientation_and_script_detection(STRING& filename,
|
||||
OSResults* osr,
|
||||
tesseract::Tesseract* tess) {
|
||||
STRING name = filename; //truncated name
|
||||
const char *lastdot; //of name
|
||||
TBOX page_box;
|
||||
|
||||
lastdot = strrchr(name.string(), '.');
|
||||
if (lastdot != NULL)
|
||||
name[lastdot - name.string()] = '\0';
|
||||
|
||||
ASSERT_HOST(tess->pix_binary() != NULL)
|
||||
int width = pixGetWidth(tess->pix_binary());
|
||||
int height = pixGetHeight(tess->pix_binary());
|
||||
|
||||
BLOCK_LIST blocks;
|
||||
if (!read_unlv_file(name, width, height, &blocks))
|
||||
FullPageBlock(width, height, &blocks);
|
||||
|
||||
// Try to remove non-text regions from consideration.
|
||||
TO_BLOCK_LIST land_blocks, port_blocks;
|
||||
remove_nontext_regions(tess, &blocks, &port_blocks);
|
||||
|
||||
if (port_blocks.empty()) {
|
||||
// page segmentation did not succeed, so we need to find_components first.
|
||||
tess->mutable_textord()->find_components(tess->pix_binary(),
|
||||
&blocks, &port_blocks);
|
||||
}
|
||||
else {
|
||||
page_box.set_left(0);
|
||||
page_box.set_bottom(0);
|
||||
page_box.set_right(width);
|
||||
page_box.set_top(height);
|
||||
// Filter_blobs sets up the TO_BLOCKs the same as find_components does.
|
||||
tess->mutable_textord()->filter_blobs(page_box.topright(),
|
||||
&port_blocks, true);
|
||||
}
|
||||
|
||||
return os_detect(&port_blocks, osr, tess);
|
||||
}
|
||||
|
||||
// Filter and sample the blobs.
|
||||
// Returns a non-zero number of blobs if the page was successfully processed, or
|
||||
// zero if the page had too few characters to be reliable
|
||||
int os_detect(TO_BLOCK_LIST* port_blocks, OSResults* osr,
|
||||
tesseract::Tesseract* tess) {
|
||||
int blobs_total = 0;
|
||||
TO_BLOCK_IT block_it;
|
||||
block_it.set_to_list(port_blocks);
|
||||
|
||||
BLOBNBOX_CLIST filtered_list;
|
||||
BLOBNBOX_C_IT filtered_it(&filtered_list);
|
||||
|
||||
for (block_it.mark_cycle_pt(); !block_it.cycled_list();
|
||||
block_it.forward()) {
|
||||
TO_BLOCK* to_block = block_it.data();
|
||||
if (to_block->block->poly_block() &&
|
||||
!to_block->block->poly_block()->IsText()) continue;
|
||||
BLOBNBOX_IT bbox_it;
|
||||
bbox_it.set_to_list(&to_block->blobs);
|
||||
for (bbox_it.mark_cycle_pt(); !bbox_it.cycled_list();
|
||||
bbox_it.forward()) {
|
||||
BLOBNBOX* bbox = bbox_it.data();
|
||||
C_BLOB* blob = bbox->cblob();
|
||||
TBOX box = blob->bounding_box();
|
||||
++blobs_total;
|
||||
|
||||
float y_x = fabs((box.height() * 1.0) / box.width());
|
||||
float x_y = 1.0f / y_x;
|
||||
// Select a >= 1.0 ratio
|
||||
float ratio = x_y > y_x ? x_y : y_x;
|
||||
// Blob is ambiguous
|
||||
if (ratio > kSizeRatioToReject) continue;
|
||||
if (box.height() < kMinAcceptableBlobHeight) continue;
|
||||
filtered_it.add_to_end(bbox);
|
||||
}
|
||||
}
|
||||
return os_detect_blobs(NULL, &filtered_list, osr, tess);
|
||||
}
|
||||
|
||||
// Detect orientation and script from a list of blobs.
|
||||
// Returns a non-zero number of blobs if the list was successfully processed, or
|
||||
// zero if the list had too few characters to be reliable.
|
||||
// If allowed_scripts is non-null and non-empty, it is a list of scripts that
|
||||
// constrains both orientation and script detection to consider only scripts
|
||||
// from the list.
|
||||
int os_detect_blobs(const GenericVector<int>* allowed_scripts,
|
||||
BLOBNBOX_CLIST* blob_list, OSResults* osr,
|
||||
tesseract::Tesseract* tess) {
|
||||
OSResults osr_;
|
||||
if (osr == NULL)
|
||||
osr = &osr_;
|
||||
|
||||
osr->unicharset = &tess->unicharset;
|
||||
OrientationDetector o(allowed_scripts, osr);
|
||||
ScriptDetector s(allowed_scripts, osr, tess);
|
||||
|
||||
BLOBNBOX_C_IT filtered_it(blob_list);
|
||||
int real_max = MIN(filtered_it.length(), kMaxCharactersToTry);
|
||||
// tprintf("Total blobs found = %d\n", blobs_total);
|
||||
// tprintf("Number of blobs post-filtering = %d\n", filtered_it.length());
|
||||
// tprintf("Number of blobs to try = %d\n", real_max);
|
||||
|
||||
// If there are too few characters, skip this page entirely.
|
||||
if (real_max < kMinCharactersToTry / 2) {
|
||||
tprintf("Too few characters. Skipping this page\n");
|
||||
return 0;
|
||||
}
|
||||
|
||||
BLOBNBOX** blobs = new BLOBNBOX*[filtered_it.length()];
|
||||
int number_of_blobs = 0;
|
||||
for (filtered_it.mark_cycle_pt(); !filtered_it.cycled_list();
|
||||
filtered_it.forward()) {
|
||||
blobs[number_of_blobs++] = (BLOBNBOX*)filtered_it.data();
|
||||
}
|
||||
QRSequenceGenerator sequence(number_of_blobs);
|
||||
int num_blobs_evaluated = 0;
|
||||
for (int i = 0; i < real_max; ++i) {
|
||||
if (os_detect_blob(blobs[sequence.GetVal()], &o, &s, osr, tess)
|
||||
&& i > kMinCharactersToTry) {
|
||||
break;
|
||||
}
|
||||
++num_blobs_evaluated;
|
||||
}
|
||||
delete[] blobs;
|
||||
|
||||
// Make sure the best_result is up-to-date
|
||||
int orientation = o.get_orientation();
|
||||
osr->update_best_script(orientation);
|
||||
return num_blobs_evaluated;
|
||||
}
|
||||
|
||||
// Processes a single blob to estimate script and orientation.
|
||||
// Return true if estimate of orientation and script satisfies stopping
|
||||
// criteria.
|
||||
bool os_detect_blob(BLOBNBOX* bbox, OrientationDetector* o,
|
||||
ScriptDetector* s, OSResults* osr,
|
||||
tesseract::Tesseract* tess) {
|
||||
tess->tess_cn_matching.set_value(true); // turn it on
|
||||
tess->tess_bn_matching.set_value(false);
|
||||
C_BLOB* blob = bbox->cblob();
|
||||
TBLOB* tblob = TBLOB::PolygonalCopy(tess->poly_allow_detailed_fx, blob);
|
||||
TBOX box = tblob->bounding_box();
|
||||
FCOORD current_rotation(1.0f, 0.0f);
|
||||
FCOORD rotation90(0.0f, 1.0f);
|
||||
BLOB_CHOICE_LIST ratings[4];
|
||||
// Test the 4 orientations
|
||||
for (int i = 0; i < 4; ++i) {
|
||||
// Normalize the blob. Set the origin to the place we want to be the
|
||||
// bottom-middle after rotation.
|
||||
// Scaling is to make the rotated height the x-height.
|
||||
float scaling = static_cast<float>(kBlnXHeight) / box.height();
|
||||
float x_origin = (box.left() + box.right()) / 2.0f;
|
||||
float y_origin = (box.bottom() + box.top()) / 2.0f;
|
||||
if (i == 0 || i == 2) {
|
||||
// Rotation is 0 or 180.
|
||||
y_origin = i == 0 ? box.bottom() : box.top();
|
||||
}
|
||||
else {
|
||||
// Rotation is 90 or 270.
|
||||
scaling = static_cast<float>(kBlnXHeight) / box.width();
|
||||
x_origin = i == 1 ? box.left() : box.right();
|
||||
}
|
||||
TBLOB* rotated_blob = new TBLOB(*tblob);
|
||||
rotated_blob->Normalize(NULL, ¤t_rotation, NULL,
|
||||
x_origin, y_origin, scaling, scaling,
|
||||
0.0f, static_cast<float>(kBlnBaselineOffset),
|
||||
false, NULL);
|
||||
tess->AdaptiveClassifier(rotated_blob, ratings + i);
|
||||
delete rotated_blob;
|
||||
current_rotation.rotate(rotation90);
|
||||
}
|
||||
delete tblob;
|
||||
|
||||
bool stop = o->detect_blob(ratings);
|
||||
s->detect_blob(ratings);
|
||||
int orientation = o->get_orientation();
|
||||
stop = s->must_stop(orientation) && stop;
|
||||
return stop;
|
||||
}
|
||||
|
||||
|
||||
OrientationDetector::OrientationDetector(
|
||||
const GenericVector<int>* allowed_scripts, OSResults* osr) {
|
||||
osr_ = osr;
|
||||
allowed_scripts_ = allowed_scripts;
|
||||
}
|
||||
|
||||
// Score the given blob and return true if it is now sure of the orientation
|
||||
// after adding this block.
|
||||
bool OrientationDetector::detect_blob(BLOB_CHOICE_LIST* scores) {
|
||||
float blob_o_score[4] = { 0.0f, 0.0f, 0.0f, 0.0f };
|
||||
float total_blob_o_score = 0.0f;
|
||||
|
||||
for (int i = 0; i < 4; ++i) {
|
||||
BLOB_CHOICE_IT choice_it(scores + i);
|
||||
if (!choice_it.empty()) {
|
||||
BLOB_CHOICE* choice = NULL;
|
||||
if (allowed_scripts_ != NULL && !allowed_scripts_->empty()) {
|
||||
// Find the top choice in an allowed script.
|
||||
for (choice_it.mark_cycle_pt(); !choice_it.cycled_list() &&
|
||||
choice == NULL; choice_it.forward()) {
|
||||
int choice_script = choice_it.data()->script_id();
|
||||
int s = 0;
|
||||
for (s = 0; s < allowed_scripts_->size(); ++s) {
|
||||
if ((*allowed_scripts_)[s] == choice_script) {
|
||||
choice = choice_it.data();
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
else {
|
||||
choice = choice_it.data();
|
||||
}
|
||||
if (choice != NULL) {
|
||||
// The certainty score ranges between [-20,0]. This is converted here to
|
||||
// [0,1], with 1 indicating best match.
|
||||
blob_o_score[i] = 1 + 0.05 * choice->certainty();
|
||||
total_blob_o_score += blob_o_score[i];
|
||||
}
|
||||
}
|
||||
}
|
||||
if (total_blob_o_score == 0.0) return false;
|
||||
// Fill in any blanks with the worst score of the others. This is better than
|
||||
// picking an arbitrary probability for it and way better than -inf.
|
||||
float worst_score = 0.0f;
|
||||
int num_good_scores = 0;
|
||||
for (int i = 0; i < 4; ++i) {
|
||||
if (blob_o_score[i] > 0.0f) {
|
||||
++num_good_scores;
|
||||
if (worst_score == 0.0f || blob_o_score[i] < worst_score)
|
||||
worst_score = blob_o_score[i];
|
||||
}
|
||||
}
|
||||
if (num_good_scores == 1) {
|
||||
// Lower worst if there is only one.
|
||||
worst_score /= 2.0f;
|
||||
}
|
||||
for (int i = 0; i < 4; ++i) {
|
||||
if (blob_o_score[i] == 0.0f) {
|
||||
blob_o_score[i] = worst_score;
|
||||
total_blob_o_score += worst_score;
|
||||
}
|
||||
}
|
||||
// Normalize the orientation scores for the blob and use them to
|
||||
// update the aggregated orientation score.
|
||||
for (int i = 0; total_blob_o_score != 0 && i < 4; ++i) {
|
||||
osr_->orientations[i] += log(blob_o_score[i] / total_blob_o_score);
|
||||
}
|
||||
|
||||
// TODO(ranjith) Add an early exit test, based on min_orientation_margin,
|
||||
// as used in pagesegmain.cpp.
|
||||
return false;
|
||||
}
|
||||
|
||||
int OrientationDetector::get_orientation() {
|
||||
osr_->update_best_orientation();
|
||||
return osr_->best_result.orientation_id;
|
||||
}
|
||||
|
||||
|
||||
ScriptDetector::ScriptDetector(const GenericVector<int>* allowed_scripts,
|
||||
OSResults* osr, tesseract::Tesseract* tess) {
|
||||
osr_ = osr;
|
||||
tess_ = tess;
|
||||
allowed_scripts_ = allowed_scripts;
|
||||
katakana_id_ = tess_->unicharset.add_script(katakana_script);
|
||||
hiragana_id_ = tess_->unicharset.add_script(hiragana_script);
|
||||
han_id_ = tess_->unicharset.add_script(han_script);
|
||||
hangul_id_ = tess_->unicharset.add_script(hangul_script);
|
||||
japanese_id_ = tess_->unicharset.add_script(japanese_script_);
|
||||
korean_id_ = tess_->unicharset.add_script(korean_script_);
|
||||
latin_id_ = tess_->unicharset.add_script(latin_script);
|
||||
fraktur_id_ = tess_->unicharset.add_script(fraktur_script_);
|
||||
}
|
||||
|
||||
|
||||
// Score the given blob and return true if it is now sure of the script after
|
||||
// adding this blob.
|
||||
void ScriptDetector::detect_blob(BLOB_CHOICE_LIST* scores) {
|
||||
bool done[kMaxNumberOfScripts];
|
||||
for (int i = 0; i < 4; ++i) {
|
||||
for (int j = 0; j < kMaxNumberOfScripts; ++j)
|
||||
done[j] = false;
|
||||
|
||||
BLOB_CHOICE_IT choice_it;
|
||||
choice_it.set_to_list(scores + i);
|
||||
|
||||
float prev_score = -1;
|
||||
int script_count = 0;
|
||||
int prev_id = -1;
|
||||
int prev_fontinfo_id = -1;
|
||||
const char* prev_unichar = "";
|
||||
const char* unichar = "";
|
||||
|
||||
for (choice_it.mark_cycle_pt(); !choice_it.cycled_list();
|
||||
choice_it.forward()) {
|
||||
BLOB_CHOICE* choice = choice_it.data();
|
||||
int id = choice->script_id();
|
||||
if (allowed_scripts_ != NULL && !allowed_scripts_->empty()) {
|
||||
// Check that the choice is in an allowed script.
|
||||
int s = 0;
|
||||
for (s = 0; s < allowed_scripts_->size(); ++s) {
|
||||
if ((*allowed_scripts_)[s] == id) break;
|
||||
}
|
||||
if (s == allowed_scripts_->size()) continue; // Not found in list.
|
||||
}
|
||||
// Script already processed before.
|
||||
if (done[id]) continue;
|
||||
done[id] = true;
|
||||
|
||||
unichar = tess_->unicharset.id_to_unichar(choice->unichar_id());
|
||||
// Save data from the first match
|
||||
if (prev_score < 0) {
|
||||
prev_score = -choice->certainty();
|
||||
script_count = 1;
|
||||
prev_id = id;
|
||||
prev_unichar = unichar;
|
||||
prev_fontinfo_id = choice->fontinfo_id();
|
||||
}
|
||||
else if (-choice->certainty() < prev_score + kNonAmbiguousMargin) {
|
||||
++script_count;
|
||||
}
|
||||
|
||||
if (strlen(prev_unichar) == 1)
|
||||
if (unichar[0] >= '0' && unichar[0] <= '9')
|
||||
break;
|
||||
|
||||
// if script_count is >= 2, character is ambiguous, skip other matches
|
||||
// since they are useless.
|
||||
if (script_count >= 2)
|
||||
break;
|
||||
}
|
||||
// Character is non ambiguous
|
||||
if (script_count == 1) {
|
||||
// Update the score of the winning script
|
||||
osr_->scripts_na[i][prev_id] += 1.0;
|
||||
|
||||
// Workaround for Fraktur
|
||||
if (prev_id == latin_id_) {
|
||||
if (prev_fontinfo_id >= 0) {
|
||||
const tesseract::FontInfo &fi =
|
||||
tess_->get_fontinfo_table().get(prev_fontinfo_id);
|
||||
//printf("Font: %s i:%i b:%i f:%i s:%i k:%i (%s)\n", fi.name,
|
||||
// fi.is_italic(), fi.is_bold(), fi.is_fixed_pitch(),
|
||||
// fi.is_serif(), fi.is_fraktur(),
|
||||
// prev_unichar);
|
||||
if (fi.is_fraktur()) {
|
||||
osr_->scripts_na[i][prev_id] -= 1.0;
|
||||
osr_->scripts_na[i][fraktur_id_] += 1.0;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Update Japanese / Korean pseudo-scripts
|
||||
if (prev_id == katakana_id_)
|
||||
osr_->scripts_na[i][japanese_id_] += 1.0;
|
||||
if (prev_id == hiragana_id_)
|
||||
osr_->scripts_na[i][japanese_id_] += 1.0;
|
||||
if (prev_id == hangul_id_)
|
||||
osr_->scripts_na[i][korean_id_] += 1.0;
|
||||
if (prev_id == han_id_) {
|
||||
osr_->scripts_na[i][korean_id_] += kHanRatioInKorean;
|
||||
osr_->scripts_na[i][japanese_id_] += kHanRatioInJapanese;
|
||||
}
|
||||
}
|
||||
} // iterate over each orientation
|
||||
}
|
||||
|
||||
bool ScriptDetector::must_stop(int orientation) {
|
||||
osr_->update_best_script(orientation);
|
||||
return osr_->best_result.sconfidence > 1;
|
||||
}
|
||||
|
||||
// Helper method to convert an orientation index to its value in degrees.
|
||||
// The value represents the amount of clockwise rotation in degrees that must be
|
||||
// applied for the text to be upright (readable).
|
||||
int OrientationIdToValue(const int& id) {
|
||||
switch (id) {
|
||||
case 0:
|
||||
return 0;
|
||||
case 1:
|
||||
return 270;
|
||||
case 2:
|
||||
return 180;
|
||||
case 3:
|
||||
return 90;
|
||||
default:
|
||||
return -1;
|
||||
}
|
||||
}
|
|
@ -0,0 +1,138 @@
|
|||
///////////////////////////////////////////////////////////////////////
|
||||
// File: osdetect.h
|
||||
// Description: Orientation and script detection.
|
||||
// Author: Samuel Charron
|
||||
// Ranjith Unnikrishnan
|
||||
//
|
||||
// (C) Copyright 2008, Google Inc.
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
///////////////////////////////////////////////////////////////////////
|
||||
|
||||
#ifndef TESSERACT_CCMAIN_OSDETECT_H__
|
||||
#define TESSERACT_CCMAIN_OSDETECT_H__
|
||||
|
||||
#include "strngs.h"
|
||||
#include "unicharset.h"
|
||||
|
||||
class TO_BLOCK_LIST;
|
||||
class BLOBNBOX;
|
||||
class BLOB_CHOICE_LIST;
|
||||
class BLOBNBOX_CLIST;
|
||||
|
||||
namespace tesseract {
|
||||
class Tesseract;
|
||||
}
|
||||
|
||||
// Max number of scripts in ICU + "NULL" + Japanese and Korean + Fraktur
|
||||
const int kMaxNumberOfScripts = 116 + 1 + 2 + 1;
|
||||
|
||||
struct OSBestResult {
|
||||
OSBestResult() : orientation_id(0), script_id(0), sconfidence(0.0),
|
||||
oconfidence(0.0) {}
|
||||
int orientation_id;
|
||||
int script_id;
|
||||
float sconfidence;
|
||||
float oconfidence;
|
||||
};
|
||||
|
||||
struct OSResults {
|
||||
OSResults() : unicharset(NULL) {
|
||||
for (int i = 0; i < 4; ++i) {
|
||||
for (int j = 0; j < kMaxNumberOfScripts; ++j)
|
||||
scripts_na[i][j] = 0;
|
||||
orientations[i] = 0;
|
||||
}
|
||||
}
|
||||
void update_best_orientation();
|
||||
// Set the estimate of the orientation to the given id.
|
||||
void set_best_orientation(int orientation_id);
|
||||
// Update/Compute the best estimate of the script assuming the given
|
||||
// orientation id.
|
||||
void update_best_script(int orientation_id);
|
||||
// Return the index of the script with the highest score for this orientation.
|
||||
TESS_API int get_best_script(int orientation_id) const;
|
||||
// Accumulate scores with given OSResults instance and update the best script.
|
||||
void accumulate(const OSResults& osr);
|
||||
|
||||
// Print statistics.
|
||||
void print_scores(void) const;
|
||||
void print_scores(int orientation_id) const;
|
||||
|
||||
// Array holding scores for each orientation id [0,3].
|
||||
// Orientation ids [0..3] map to [0, 270, 180, 90] degree orientations of the
|
||||
// page respectively, where the values refer to the amount of clockwise
|
||||
// rotation to be applied to the page for the text to be upright and readable.
|
||||
float orientations[4];
|
||||
// Script confidence scores for each of 4 possible orientations.
|
||||
float scripts_na[4][kMaxNumberOfScripts];
|
||||
|
||||
UNICHARSET* unicharset;
|
||||
OSBestResult best_result;
|
||||
};
|
||||
|
||||
class OrientationDetector {
|
||||
public:
|
||||
OrientationDetector(const GenericVector<int>* allowed_scripts,
|
||||
OSResults* results);
|
||||
bool detect_blob(BLOB_CHOICE_LIST* scores);
|
||||
int get_orientation();
|
||||
private:
|
||||
OSResults* osr_;
|
||||
const GenericVector<int>* allowed_scripts_;
|
||||
};
|
||||
|
||||
class ScriptDetector {
|
||||
public:
|
||||
ScriptDetector(const GenericVector<int>* allowed_scripts,
|
||||
OSResults* osr, tesseract::Tesseract* tess);
|
||||
void detect_blob(BLOB_CHOICE_LIST* scores);
|
||||
bool must_stop(int orientation);
|
||||
private:
|
||||
OSResults* osr_;
|
||||
static const char* korean_script_;
|
||||
static const char* japanese_script_;
|
||||
static const char* fraktur_script_;
|
||||
int korean_id_;
|
||||
int japanese_id_;
|
||||
int katakana_id_;
|
||||
int hiragana_id_;
|
||||
int han_id_;
|
||||
int hangul_id_;
|
||||
int latin_id_;
|
||||
int fraktur_id_;
|
||||
tesseract::Tesseract* tess_;
|
||||
const GenericVector<int>* allowed_scripts_;
|
||||
};
|
||||
|
||||
int orientation_and_script_detection(STRING& filename,
|
||||
OSResults*,
|
||||
tesseract::Tesseract*);
|
||||
|
||||
int os_detect(TO_BLOCK_LIST* port_blocks,
|
||||
OSResults* osr,
|
||||
tesseract::Tesseract* tess);
|
||||
|
||||
int os_detect_blobs(const GenericVector<int>* allowed_scripts,
|
||||
BLOBNBOX_CLIST* blob_list,
|
||||
OSResults* osr,
|
||||
tesseract::Tesseract* tess);
|
||||
|
||||
bool os_detect_blob(BLOBNBOX* bbox, OrientationDetector* o,
|
||||
ScriptDetector* s, OSResults*,
|
||||
tesseract::Tesseract* tess);
|
||||
|
||||
// Helper method to convert an orientation index to its value in degrees.
|
||||
// The value represents the amount of clockwise rotation in degrees that must be
|
||||
// applied for the text to be upright (readable).
|
||||
TESS_API int OrientationIdToValue(const int& id);
|
||||
|
||||
#endif // TESSERACT_CCMAIN_OSDETECT_H__
|
|
@ -0,0 +1,450 @@
|
|||
/******************************************************************
|
||||
* File: output.cpp (Formerly output.c)
|
||||
* Description: Output pass
|
||||
* Author: Phil Cheatle
|
||||
* Created: Thu Aug 4 10:56:08 BST 1994
|
||||
*
|
||||
* (C) Copyright 1994, Hewlett-Packard Ltd.
|
||||
** Licensed under the Apache License, Version 2.0 (the "License");
|
||||
** you may not use this file except in compliance with the License.
|
||||
** You may obtain a copy of the License at
|
||||
** http://www.apache.org/licenses/LICENSE-2.0
|
||||
** Unless required by applicable law or agreed to in writing, software
|
||||
** distributed under the License is distributed on an "AS IS" BASIS,
|
||||
** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
** See the License for the specific language governing permissions and
|
||||
** limitations under the License.
|
||||
*
|
||||
**********************************************************************/
|
||||
|
||||
#ifdef _MSC_VER
|
||||
#pragma warning(disable:4244) // Conversion warnings
|
||||
#endif
|
||||
|
||||
#include <string.h>
|
||||
#include <ctype.h>
|
||||
#ifdef __UNIX__
|
||||
#include <assert.h>
|
||||
#include <unistd.h>
|
||||
#include <errno.h>
|
||||
#endif
|
||||
#include "helpers.h"
|
||||
#include "tessvars.h"
|
||||
#include "control.h"
|
||||
#include "reject.h"
|
||||
#include "docqual.h"
|
||||
#include "output.h"
|
||||
#include "globals.h"
|
||||
#include "tesseractclass.h"
|
||||
|
||||
#define EPAPER_EXT ".ep"
|
||||
#define PAGE_YSIZE 3508
|
||||
#define CTRL_INSET '\024' //dc4=text inset
|
||||
#define CTRL_FONT '\016' //so=font change
|
||||
#define CTRL_DEFAULT '\017' //si=default font
|
||||
#define CTRL_SHIFT '\022' //dc2=x shift
|
||||
#define CTRL_TAB '\011' //tab
|
||||
#define CTRL_NEWLINE '\012' //newline
|
||||
#define CTRL_HARDLINE '\015' //cr
|
||||
|
||||
/**********************************************************************
|
||||
* pixels_to_pts
|
||||
*
|
||||
* Convert an integer number of pixels to the nearest integer
|
||||
* number of points.
|
||||
**********************************************************************/
|
||||
|
||||
inT32 pixels_to_pts( //convert coords
|
||||
inT32 pixels,
|
||||
inT32 pix_res //resolution
|
||||
) {
|
||||
float pts; //converted value
|
||||
|
||||
pts = pixels * 72.0 / pix_res;
|
||||
return (inT32)(pts + 0.5); //round it
|
||||
}
|
||||
|
||||
namespace tesseract {
|
||||
void Tesseract::output_pass( //Tess output pass //send to api
|
||||
PAGE_RES_IT &page_res_it,
|
||||
const TBOX *target_word_box) {
|
||||
BLOCK_RES *block_of_last_word;
|
||||
BOOL8 force_eol; //During output
|
||||
BLOCK *nextblock; //block of next word
|
||||
WERD *nextword; //next word
|
||||
|
||||
page_res_it.restart_page();
|
||||
block_of_last_word = NULL;
|
||||
while (page_res_it.word() != NULL) {
|
||||
check_debug_pt(page_res_it.word(), 120);
|
||||
|
||||
if (target_word_box) {
|
||||
TBOX current_word_box = page_res_it.word()->word->bounding_box();
|
||||
FCOORD center_pt(
|
||||
(current_word_box.right() + current_word_box.left()) / 2,
|
||||
(current_word_box.bottom() + current_word_box.top()) / 2);
|
||||
if (!target_word_box->contains(center_pt)) {
|
||||
page_res_it.forward();
|
||||
continue;
|
||||
}
|
||||
}
|
||||
if (tessedit_write_block_separators &&
|
||||
block_of_last_word != page_res_it.block()) {
|
||||
block_of_last_word = page_res_it.block();
|
||||
}
|
||||
|
||||
force_eol = (tessedit_write_block_separators &&
|
||||
(page_res_it.block() != page_res_it.next_block())) ||
|
||||
(page_res_it.next_word() == NULL);
|
||||
|
||||
if (page_res_it.next_word() != NULL)
|
||||
nextword = page_res_it.next_word()->word;
|
||||
else
|
||||
nextword = NULL;
|
||||
if (page_res_it.next_block() != NULL)
|
||||
nextblock = page_res_it.next_block()->block;
|
||||
else
|
||||
nextblock = NULL;
|
||||
//regardless of tilde crunching
|
||||
write_results(page_res_it,
|
||||
determine_newline_type(page_res_it.word()->word,
|
||||
page_res_it.block()->block,
|
||||
nextword, nextblock), force_eol);
|
||||
page_res_it.forward();
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/*************************************************************************
|
||||
* write_results()
|
||||
*
|
||||
* All recognition and rejection has now been done. Generate the following:
|
||||
* .txt file - giving the final best choices with NO highlighting
|
||||
* .raw file - giving the tesseract top choice output for each word
|
||||
* .map file - showing how the .txt file has been rejected in the .ep file
|
||||
* epchoice list - a list of one element per word, containing the text for the
|
||||
* epaper. Reject strings are inserted.
|
||||
* inset list - a list of bounding boxes of reject insets - indexed by the
|
||||
* reject strings in the epchoice text.
|
||||
*************************************************************************/
|
||||
void Tesseract::write_results(PAGE_RES_IT &page_res_it,
|
||||
char newline_type, // type of newline
|
||||
BOOL8 force_eol) { // override tilde crunch?
|
||||
WERD_RES *word = page_res_it.word();
|
||||
const UNICHARSET &uchset = *word->uch_set;
|
||||
int i;
|
||||
BOOL8 need_reject = FALSE;
|
||||
UNICHAR_ID space = uchset.unichar_to_id(" ");
|
||||
|
||||
if ((word->unlv_crunch_mode != CR_NONE ||
|
||||
word->best_choice->length() == 0) &&
|
||||
!tessedit_zero_kelvin_rejection && !tessedit_word_for_word) {
|
||||
if ((word->unlv_crunch_mode != CR_DELETE) &&
|
||||
(!stats_.tilde_crunch_written ||
|
||||
((word->unlv_crunch_mode == CR_KEEP_SPACE) &&
|
||||
(word->word->space() > 0) &&
|
||||
!word->word->flag(W_FUZZY_NON) &&
|
||||
!word->word->flag(W_FUZZY_SP)))) {
|
||||
if (!word->word->flag(W_BOL) &&
|
||||
(word->word->space() > 0) &&
|
||||
!word->word->flag(W_FUZZY_NON) &&
|
||||
!word->word->flag(W_FUZZY_SP)) {
|
||||
stats_.last_char_was_tilde = false;
|
||||
}
|
||||
need_reject = TRUE;
|
||||
}
|
||||
if ((need_reject && !stats_.last_char_was_tilde) ||
|
||||
(force_eol && stats_.write_results_empty_block)) {
|
||||
/* Write a reject char - mark as rejected unless zero_rejection mode */
|
||||
stats_.last_char_was_tilde = TRUE;
|
||||
stats_.tilde_crunch_written = true;
|
||||
stats_.last_char_was_newline = false;
|
||||
stats_.write_results_empty_block = false;
|
||||
}
|
||||
|
||||
if ((word->word->flag(W_EOL) && !stats_.last_char_was_newline) || force_eol) {
|
||||
stats_.tilde_crunch_written = false;
|
||||
stats_.last_char_was_newline = true;
|
||||
stats_.last_char_was_tilde = false;
|
||||
}
|
||||
|
||||
if (force_eol)
|
||||
stats_.write_results_empty_block = true;
|
||||
return;
|
||||
}
|
||||
|
||||
/* NORMAL PROCESSING of non tilde crunched words */
|
||||
|
||||
stats_.tilde_crunch_written = false;
|
||||
if (newline_type)
|
||||
stats_.last_char_was_newline = true;
|
||||
else
|
||||
stats_.last_char_was_newline = false;
|
||||
stats_.write_results_empty_block = force_eol; // about to write a real word
|
||||
|
||||
if (unlv_tilde_crunching &&
|
||||
stats_.last_char_was_tilde &&
|
||||
(word->word->space() == 0) &&
|
||||
!(word->word->flag(W_REP_CHAR) && tessedit_write_rep_codes) &&
|
||||
(word->best_choice->unichar_id(0) == space)) {
|
||||
/* Prevent adjacent tilde across words - we know that adjacent tildes within
|
||||
words have been removed */
|
||||
word->MergeAdjacentBlobs(0);
|
||||
}
|
||||
if (newline_type ||
|
||||
(word->word->flag(W_REP_CHAR) && tessedit_write_rep_codes))
|
||||
stats_.last_char_was_tilde = false;
|
||||
else {
|
||||
if (word->reject_map.length() > 0) {
|
||||
if (word->best_choice->unichar_id(word->reject_map.length() - 1) == space)
|
||||
stats_.last_char_was_tilde = true;
|
||||
else
|
||||
stats_.last_char_was_tilde = false;
|
||||
}
|
||||
else if (word->word->space() > 0)
|
||||
stats_.last_char_was_tilde = false;
|
||||
/* else it is unchanged as there are no output chars */
|
||||
}
|
||||
|
||||
ASSERT_HOST(word->best_choice->length() == word->reject_map.length());
|
||||
|
||||
set_unlv_suspects(word);
|
||||
check_debug_pt(word, 120);
|
||||
if (tessedit_rejection_debug) {
|
||||
tprintf("Dict word: \"%s\": %d\n",
|
||||
word->best_choice->debug_string().string(),
|
||||
dict_word(*(word->best_choice)));
|
||||
}
|
||||
if (!word->word->flag(W_REP_CHAR) || !tessedit_write_rep_codes) {
|
||||
if (tessedit_zero_rejection) {
|
||||
/* OVERRIDE ALL REJECTION MECHANISMS - ONLY REJECT TESS FAILURES */
|
||||
for (i = 0; i < word->best_choice->length(); ++i) {
|
||||
if (word->reject_map[i].rejected())
|
||||
word->reject_map[i].setrej_minimal_rej_accept();
|
||||
}
|
||||
}
|
||||
if (tessedit_minimal_rejection) {
|
||||
/* OVERRIDE ALL REJECTION MECHANISMS - ONLY REJECT TESS FAILURES */
|
||||
for (i = 0; i < word->best_choice->length(); ++i) {
|
||||
if ((word->best_choice->unichar_id(i) != space) &&
|
||||
word->reject_map[i].rejected())
|
||||
word->reject_map[i].setrej_minimal_rej_accept();
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
} // namespace tesseract
|
||||
|
||||
/**********************************************************************
|
||||
* determine_newline_type
|
||||
*
|
||||
* Find whether we have a wrapping or hard newline.
|
||||
* Return FALSE if not at end of line.
|
||||
**********************************************************************/
|
||||
|
||||
char determine_newline_type( //test line ends
|
||||
WERD *word, //word to do
|
||||
BLOCK *block, //current block
|
||||
WERD *next_word, //next word
|
||||
BLOCK *next_block //block of next word
|
||||
) {
|
||||
inT16 end_gap; //to right edge
|
||||
inT16 width; //of next word
|
||||
TBOX word_box; //bounding
|
||||
TBOX next_box; //next word
|
||||
TBOX block_box; //block bounding
|
||||
|
||||
if (!word->flag(W_EOL))
|
||||
return FALSE; //not end of line
|
||||
if (next_word == NULL || next_block == NULL || block != next_block)
|
||||
return CTRL_NEWLINE;
|
||||
if (next_word->space() > 0)
|
||||
return CTRL_HARDLINE; //it is tabbed
|
||||
word_box = word->bounding_box();
|
||||
next_box = next_word->bounding_box();
|
||||
block_box = block->bounding_box();
|
||||
//gap to eol
|
||||
end_gap = block_box.right() - word_box.right();
|
||||
end_gap -= (inT32)block->space();
|
||||
width = next_box.right() - next_box.left();
|
||||
// tprintf("end_gap=%d-%d=%d, width=%d-%d=%d, nl=%d\n",
|
||||
// block_box.right(),word_box.right(),end_gap,
|
||||
// next_box.right(),next_box.left(),width,
|
||||
// end_gap>width ? CTRL_HARDLINE : CTRL_NEWLINE);
|
||||
return end_gap > width ? CTRL_HARDLINE : CTRL_NEWLINE;
|
||||
}
|
||||
|
||||
/*************************************************************************
|
||||
* get_rep_char()
|
||||
* Return the first accepted character from the repetition string. This is the
|
||||
* character which is repeated - as determined earlier by fix_rep_char()
|
||||
*************************************************************************/
|
||||
namespace tesseract {
|
||||
UNICHAR_ID Tesseract::get_rep_char(WERD_RES *word) { // what char is repeated?
|
||||
int i;
|
||||
for (i = 0; ((i < word->reject_map.length()) &&
|
||||
(word->reject_map[i].rejected())); ++i);
|
||||
|
||||
if (i < word->reject_map.length()) {
|
||||
return word->best_choice->unichar_id(i);
|
||||
}
|
||||
else {
|
||||
return word->uch_set->unichar_to_id(unrecognised_char.string());
|
||||
}
|
||||
}
|
||||
|
||||
/*************************************************************************
|
||||
* SUSPECT LEVELS
|
||||
*
|
||||
* 0 - don't reject ANYTHING
|
||||
* 1,2 - partial rejection
|
||||
* 3 - BEST
|
||||
*
|
||||
* NOTE: to reject JUST tess failures in the .map file set suspect_level 3 and
|
||||
* tessedit_minimal_rejection.
|
||||
*************************************************************************/
|
||||
void Tesseract::set_unlv_suspects(WERD_RES *word_res) {
|
||||
int len = word_res->reject_map.length();
|
||||
const WERD_CHOICE &word = *(word_res->best_choice);
|
||||
const UNICHARSET &uchset = *word.unicharset();
|
||||
int i;
|
||||
float rating_per_ch;
|
||||
|
||||
if (suspect_level == 0) {
|
||||
for (i = 0; i < len; i++) {
|
||||
if (word_res->reject_map[i].rejected())
|
||||
word_res->reject_map[i].setrej_minimal_rej_accept();
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
if (suspect_level >= 3)
|
||||
return; //Use defaults
|
||||
|
||||
/* NOW FOR LEVELS 1 and 2 Find some stuff to unreject*/
|
||||
|
||||
if (safe_dict_word(word_res) &&
|
||||
(count_alphas(word) > suspect_short_words)) {
|
||||
/* Unreject alphas in dictionary words */
|
||||
for (i = 0; i < len; ++i) {
|
||||
if (word_res->reject_map[i].rejected() &&
|
||||
uchset.get_isalpha(word.unichar_id(i)))
|
||||
word_res->reject_map[i].setrej_minimal_rej_accept();
|
||||
}
|
||||
}
|
||||
|
||||
rating_per_ch = word.rating() / word_res->reject_map.length();
|
||||
|
||||
if (rating_per_ch >= suspect_rating_per_ch)
|
||||
return; // Don't touch bad ratings
|
||||
|
||||
if ((word_res->tess_accepted) || (rating_per_ch < suspect_accept_rating)) {
|
||||
/* Unreject any Tess Acceptable word - but NOT tess reject chs*/
|
||||
for (i = 0; i < len; ++i) {
|
||||
if (word_res->reject_map[i].rejected() &&
|
||||
(!uchset.eq(word.unichar_id(i), " ")))
|
||||
word_res->reject_map[i].setrej_minimal_rej_accept();
|
||||
}
|
||||
}
|
||||
|
||||
for (i = 0; i < len; i++) {
|
||||
if (word_res->reject_map[i].rejected()) {
|
||||
if (word_res->reject_map[i].flag(R_DOC_REJ))
|
||||
word_res->reject_map[i].setrej_minimal_rej_accept();
|
||||
if (word_res->reject_map[i].flag(R_BLOCK_REJ))
|
||||
word_res->reject_map[i].setrej_minimal_rej_accept();
|
||||
if (word_res->reject_map[i].flag(R_ROW_REJ))
|
||||
word_res->reject_map[i].setrej_minimal_rej_accept();
|
||||
}
|
||||
}
|
||||
|
||||
if (suspect_level == 2)
|
||||
return;
|
||||
|
||||
if (!suspect_constrain_1Il ||
|
||||
(word_res->reject_map.length() <= suspect_short_words)) {
|
||||
for (i = 0; i < len; i++) {
|
||||
if (word_res->reject_map[i].rejected()) {
|
||||
if ((word_res->reject_map[i].flag(R_1IL_CONFLICT) ||
|
||||
word_res->reject_map[i].flag(R_POSTNN_1IL)))
|
||||
word_res->reject_map[i].setrej_minimal_rej_accept();
|
||||
|
||||
if (!suspect_constrain_1Il &&
|
||||
word_res->reject_map[i].flag(R_MM_REJECT))
|
||||
word_res->reject_map[i].setrej_minimal_rej_accept();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (acceptable_word_string(*word_res->uch_set,
|
||||
word.unichar_string().string(),
|
||||
word.unichar_lengths().string()) !=
|
||||
AC_UNACCEPTABLE ||
|
||||
acceptable_number_string(word.unichar_string().string(),
|
||||
word.unichar_lengths().string())) {
|
||||
if (word_res->reject_map.length() > suspect_short_words) {
|
||||
for (i = 0; i < len; i++) {
|
||||
if (word_res->reject_map[i].rejected() &&
|
||||
(!word_res->reject_map[i].perm_rejected() ||
|
||||
word_res->reject_map[i].flag(R_1IL_CONFLICT) ||
|
||||
word_res->reject_map[i].flag(R_POSTNN_1IL) ||
|
||||
word_res->reject_map[i].flag(R_MM_REJECT))) {
|
||||
word_res->reject_map[i].setrej_minimal_rej_accept();
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
inT16 Tesseract::count_alphas(const WERD_CHOICE &word) {
|
||||
int count = 0;
|
||||
for (int i = 0; i < word.length(); ++i) {
|
||||
if (word.unicharset()->get_isalpha(word.unichar_id(i)))
|
||||
count++;
|
||||
}
|
||||
return count;
|
||||
}
|
||||
|
||||
|
||||
inT16 Tesseract::count_alphanums(const WERD_CHOICE &word) {
|
||||
int count = 0;
|
||||
for (int i = 0; i < word.length(); ++i) {
|
||||
if (word.unicharset()->get_isalpha(word.unichar_id(i)) ||
|
||||
word.unicharset()->get_isdigit(word.unichar_id(i)))
|
||||
count++;
|
||||
}
|
||||
return count;
|
||||
}
|
||||
|
||||
|
||||
BOOL8 Tesseract::acceptable_number_string(const char *s,
|
||||
const char *lengths) {
|
||||
BOOL8 prev_digit = FALSE;
|
||||
|
||||
if (*lengths == 1 && *s == '(')
|
||||
s++;
|
||||
|
||||
if (*lengths == 1 &&
|
||||
((*s == '$') || (*s == '.') || (*s == '+') || (*s == '-')))
|
||||
s++;
|
||||
|
||||
for (; *s != '\0'; s += *(lengths++)) {
|
||||
if (unicharset.get_isdigit(s, *lengths))
|
||||
prev_digit = TRUE;
|
||||
else if (prev_digit &&
|
||||
(*lengths == 1 && ((*s == '.') || (*s == ',') || (*s == '-'))))
|
||||
prev_digit = FALSE;
|
||||
else if (prev_digit && *lengths == 1 &&
|
||||
(*(s + *lengths) == '\0') && ((*s == '%') || (*s == ')')))
|
||||
return TRUE;
|
||||
else if (prev_digit &&
|
||||
*lengths == 1 && (*s == '%') &&
|
||||
(*(lengths + 1) == 1 && *(s + *lengths) == ')') &&
|
||||
(*(s + *lengths + *(lengths + 1)) == '\0'))
|
||||
return TRUE;
|
||||
else
|
||||
return FALSE;
|
||||
}
|
||||
return TRUE;
|
||||
}
|
||||
} // namespace tesseract
|
|
@ -0,0 +1,33 @@
|
|||
/******************************************************************
|
||||
* File: output.h (Formerly output.h)
|
||||
* Description: Output pass
|
||||
* Author: Phil Cheatle
|
||||
* Created: Thu Aug 4 10:56:08 BST 1994
|
||||
*
|
||||
* (C) Copyright 1994, Hewlett-Packard Ltd.
|
||||
** Licensed under the Apache License, Version 2.0 (the "License");
|
||||
** you may not use this file except in compliance with the License.
|
||||
** You may obtain a copy of the License at
|
||||
** http://www.apache.org/licenses/LICENSE-2.0
|
||||
** Unless required by applicable law or agreed to in writing, software
|
||||
** distributed under the License is distributed on an "AS IS" BASIS,
|
||||
** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
** See the License for the specific language governing permissions and
|
||||
** limitations under the License.
|
||||
*
|
||||
**********************************************************************/
|
||||
|
||||
#ifndef OUTPUT_H
|
||||
#define OUTPUT_H
|
||||
|
||||
#include "params.h"
|
||||
//#include "epapconv.h"
|
||||
#include "pageres.h"
|
||||
|
||||
/** test line ends */
|
||||
char determine_newline_type(WERD *word, ///< word to do
|
||||
BLOCK *block, ///< current block
|
||||
WERD *next_word, ///< next word
|
||||
BLOCK *next_block ///< block of next word
|
||||
);
|
||||
#endif
|
|
@ -0,0 +1,631 @@
|
|||
///////////////////////////////////////////////////////////////////////
|
||||
// File: pageiterator.cpp
|
||||
// Description: Iterator for tesseract page structure that avoids using
|
||||
// tesseract internal data structures.
|
||||
// Author: Ray Smith
|
||||
// Created: Fri Feb 26 14:32:09 PST 2010
|
||||
//
|
||||
// (C) Copyright 2010, Google Inc.
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
///////////////////////////////////////////////////////////////////////
|
||||
|
||||
#include "pageiterator.h"
|
||||
#include "allheaders.h"
|
||||
#include "helpers.h"
|
||||
#include "pageres.h"
|
||||
#include "tesseractclass.h"
|
||||
|
||||
namespace tesseract {
|
||||
|
||||
PageIterator::PageIterator(PAGE_RES* page_res, Tesseract* tesseract, int scale,
|
||||
int scaled_yres, int rect_left, int rect_top,
|
||||
int rect_width, int rect_height)
|
||||
: page_res_(page_res),
|
||||
tesseract_(tesseract),
|
||||
word_(NULL),
|
||||
word_length_(0),
|
||||
blob_index_(0),
|
||||
cblob_it_(NULL),
|
||||
include_upper_dots_(false),
|
||||
include_lower_dots_(false),
|
||||
scale_(scale),
|
||||
scaled_yres_(scaled_yres),
|
||||
rect_left_(rect_left),
|
||||
rect_top_(rect_top),
|
||||
rect_width_(rect_width),
|
||||
rect_height_(rect_height) {
|
||||
it_ = new PAGE_RES_IT(page_res);
|
||||
PageIterator::Begin();
|
||||
}
|
||||
|
||||
PageIterator::~PageIterator() {
|
||||
delete it_;
|
||||
delete cblob_it_;
|
||||
}
|
||||
|
||||
/**
|
||||
* PageIterators may be copied! This makes it possible to iterate over
|
||||
* all the objects at a lower level, while maintaining an iterator to
|
||||
* objects at a higher level.
|
||||
*/
|
||||
PageIterator::PageIterator(const PageIterator& src)
|
||||
: page_res_(src.page_res_),
|
||||
tesseract_(src.tesseract_),
|
||||
word_(NULL),
|
||||
word_length_(src.word_length_),
|
||||
blob_index_(src.blob_index_),
|
||||
cblob_it_(NULL),
|
||||
include_upper_dots_(src.include_upper_dots_),
|
||||
include_lower_dots_(src.include_lower_dots_),
|
||||
scale_(src.scale_),
|
||||
scaled_yres_(src.scaled_yres_),
|
||||
rect_left_(src.rect_left_),
|
||||
rect_top_(src.rect_top_),
|
||||
rect_width_(src.rect_width_),
|
||||
rect_height_(src.rect_height_) {
|
||||
it_ = new PAGE_RES_IT(*src.it_);
|
||||
BeginWord(src.blob_index_);
|
||||
}
|
||||
|
||||
const PageIterator& PageIterator::operator=(const PageIterator& src) {
|
||||
page_res_ = src.page_res_;
|
||||
tesseract_ = src.tesseract_;
|
||||
include_upper_dots_ = src.include_upper_dots_;
|
||||
include_lower_dots_ = src.include_lower_dots_;
|
||||
scale_ = src.scale_;
|
||||
scaled_yres_ = src.scaled_yres_;
|
||||
rect_left_ = src.rect_left_;
|
||||
rect_top_ = src.rect_top_;
|
||||
rect_width_ = src.rect_width_;
|
||||
rect_height_ = src.rect_height_;
|
||||
delete it_;
|
||||
it_ = new PAGE_RES_IT(*src.it_);
|
||||
BeginWord(src.blob_index_);
|
||||
return *this;
|
||||
}
|
||||
|
||||
bool PageIterator::PositionedAtSameWord(const PAGE_RES_IT* other) const {
|
||||
return (it_ == NULL && it_ == other) ||
|
||||
((other != NULL) && (it_ != NULL) && (*it_ == *other));
|
||||
}
|
||||
|
||||
// ============= Moving around within the page ============.
|
||||
|
||||
/** Resets the iterator to point to the start of the page. */
|
||||
void PageIterator::Begin() {
|
||||
it_->restart_page_with_empties();
|
||||
BeginWord(0);
|
||||
}
|
||||
|
||||
void PageIterator::RestartParagraph() {
|
||||
if (it_->block() == NULL) return; // At end of the document.
|
||||
PAGE_RES_IT para(page_res_);
|
||||
PAGE_RES_IT next_para(para);
|
||||
next_para.forward_paragraph();
|
||||
while (next_para.cmp(*it_) <= 0) {
|
||||
para = next_para;
|
||||
next_para.forward_paragraph();
|
||||
}
|
||||
*it_ = para;
|
||||
BeginWord(0);
|
||||
}
|
||||
|
||||
bool PageIterator::IsWithinFirstTextlineOfParagraph() const {
|
||||
PageIterator p_start(*this);
|
||||
p_start.RestartParagraph();
|
||||
return p_start.it_->row() == it_->row();
|
||||
}
|
||||
|
||||
void PageIterator::RestartRow() {
|
||||
it_->restart_row();
|
||||
BeginWord(0);
|
||||
}
|
||||
|
||||
/**
|
||||
* Moves to the start of the next object at the given level in the
|
||||
* page hierarchy, and returns false if the end of the page was reached.
|
||||
* NOTE (CHANGED!) that ALL PageIteratorLevel level values will visit each
|
||||
* non-text block at least once.
|
||||
* Think of non text blocks as containing a single para, with at least one
|
||||
* line, with a single imaginary word, containing a single symbol.
|
||||
* The bounding boxes mark out any polygonal nature of the block, and
|
||||
* PTIsTextType(BLockType()) is false for non-text blocks.
|
||||
* Calls to Next with different levels may be freely intermixed.
|
||||
* This function iterates words in right-to-left scripts correctly, if
|
||||
* the appropriate language has been loaded into Tesseract.
|
||||
*/
|
||||
bool PageIterator::Next(PageIteratorLevel level) {
|
||||
if (it_->block() == NULL) return false; // Already at the end!
|
||||
if (it_->word() == NULL)
|
||||
level = RIL_BLOCK;
|
||||
|
||||
switch (level) {
|
||||
case RIL_BLOCK:
|
||||
it_->forward_block();
|
||||
break;
|
||||
case RIL_PARA:
|
||||
it_->forward_paragraph();
|
||||
break;
|
||||
case RIL_TEXTLINE:
|
||||
for (it_->forward_with_empties(); it_->row() == it_->prev_row();
|
||||
it_->forward_with_empties());
|
||||
break;
|
||||
case RIL_WORD:
|
||||
it_->forward_with_empties();
|
||||
break;
|
||||
case RIL_SYMBOL:
|
||||
if (cblob_it_ != NULL)
|
||||
cblob_it_->forward();
|
||||
++blob_index_;
|
||||
if (blob_index_ >= word_length_)
|
||||
it_->forward_with_empties();
|
||||
else
|
||||
return true;
|
||||
break;
|
||||
}
|
||||
BeginWord(0);
|
||||
return it_->block() != NULL;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns true if the iterator is at the start of an object at the given
|
||||
* level. Possible uses include determining if a call to Next(RIL_WORD)
|
||||
* moved to the start of a RIL_PARA.
|
||||
*/
|
||||
bool PageIterator::IsAtBeginningOf(PageIteratorLevel level) const {
|
||||
if (it_->block() == NULL) return false; // Already at the end!
|
||||
if (it_->word() == NULL) return true; // In an image block.
|
||||
switch (level) {
|
||||
case RIL_BLOCK:
|
||||
return blob_index_ == 0 && it_->block() != it_->prev_block();
|
||||
case RIL_PARA:
|
||||
return blob_index_ == 0 &&
|
||||
(it_->block() != it_->prev_block() ||
|
||||
it_->row()->row->para() != it_->prev_row()->row->para());
|
||||
case RIL_TEXTLINE:
|
||||
return blob_index_ == 0 && it_->row() != it_->prev_row();
|
||||
case RIL_WORD:
|
||||
return blob_index_ == 0;
|
||||
case RIL_SYMBOL:
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns whether the iterator is positioned at the last element in a
|
||||
* given level. (e.g. the last word in a line, the last line in a block)
|
||||
*/
|
||||
bool PageIterator::IsAtFinalElement(PageIteratorLevel level,
|
||||
PageIteratorLevel element) const {
|
||||
if (Empty(element)) return true; // Already at the end!
|
||||
// The result is true if we step forward by element and find we are
|
||||
// at the the end of the page or at beginning of *all* levels in:
|
||||
// [level, element).
|
||||
// When there is more than one level difference between element and level,
|
||||
// we could for instance move forward one symbol and still be at the first
|
||||
// word on a line, so we also have to be at the first symbol in a word.
|
||||
PageIterator next(*this);
|
||||
next.Next(element);
|
||||
if (next.Empty(element)) return true; // Reached the end of the page.
|
||||
while (element > level) {
|
||||
element = static_cast<PageIteratorLevel>(element - 1);
|
||||
if (!next.IsAtBeginningOf(element))
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns whether this iterator is positioned
|
||||
* before other: -1
|
||||
* equal to other: 0
|
||||
* after other: 1
|
||||
*/
|
||||
int PageIterator::Cmp(const PageIterator &other) const {
|
||||
int word_cmp = it_->cmp(*other.it_);
|
||||
if (word_cmp != 0)
|
||||
return word_cmp;
|
||||
if (blob_index_ < other.blob_index_)
|
||||
return -1;
|
||||
if (blob_index_ == other.blob_index_)
|
||||
return 0;
|
||||
return 1;
|
||||
}
|
||||
|
||||
// ============= Accessing data ==============.
|
||||
// Coordinate system:
|
||||
// Integer coordinates are at the cracks between the pixels.
|
||||
// The top-left corner of the top-left pixel in the image is at (0,0).
|
||||
// The bottom-right corner of the bottom-right pixel in the image is at
|
||||
// (width, height).
|
||||
// Every bounding box goes from the top-left of the top-left contained
|
||||
// pixel to the bottom-right of the bottom-right contained pixel, so
|
||||
// the bounding box of the single top-left pixel in the image is:
|
||||
// (0,0)->(1,1).
|
||||
// If an image rectangle has been set in the API, then returned coordinates
|
||||
// relate to the original (full) image, rather than the rectangle.
|
||||
|
||||
/**
|
||||
* Returns the bounding rectangle of the current object at the given level in
|
||||
* the coordinates of the working image that is pix_binary().
|
||||
* See comment on coordinate system above.
|
||||
* Returns false if there is no such object at the current position.
|
||||
*/
|
||||
bool PageIterator::BoundingBoxInternal(PageIteratorLevel level,
|
||||
int* left, int* top,
|
||||
int* right, int* bottom) const {
|
||||
if (Empty(level))
|
||||
return false;
|
||||
TBOX box;
|
||||
PARA *para = NULL;
|
||||
switch (level) {
|
||||
case RIL_BLOCK:
|
||||
box = it_->block()->block->restricted_bounding_box(include_upper_dots_,
|
||||
include_lower_dots_);
|
||||
break;
|
||||
case RIL_PARA:
|
||||
para = it_->row()->row->para();
|
||||
// explicit fall-through.
|
||||
case RIL_TEXTLINE:
|
||||
box = it_->row()->row->restricted_bounding_box(include_upper_dots_,
|
||||
include_lower_dots_);
|
||||
break;
|
||||
case RIL_WORD:
|
||||
box = it_->word()->word->restricted_bounding_box(include_upper_dots_,
|
||||
include_lower_dots_);
|
||||
break;
|
||||
case RIL_SYMBOL:
|
||||
if (cblob_it_ == NULL)
|
||||
box = it_->word()->box_word->BlobBox(blob_index_);
|
||||
else
|
||||
box = cblob_it_->data()->bounding_box();
|
||||
}
|
||||
if (level == RIL_PARA) {
|
||||
PageIterator other = *this;
|
||||
other.Begin();
|
||||
do {
|
||||
if (other.it_->block() &&
|
||||
other.it_->block()->block == it_->block()->block &&
|
||||
other.it_->row() && other.it_->row()->row &&
|
||||
other.it_->row()->row->para() == para) {
|
||||
box = box.bounding_union(other.it_->row()->row->bounding_box());
|
||||
}
|
||||
} while (other.Next(RIL_TEXTLINE));
|
||||
}
|
||||
if (level != RIL_SYMBOL || cblob_it_ != NULL)
|
||||
box.rotate(it_->block()->block->re_rotation());
|
||||
// Now we have a box in tesseract coordinates relative to the image rectangle,
|
||||
// we have to convert the coords to a top-down system.
|
||||
const int pix_height = pixGetHeight(tesseract_->pix_binary());
|
||||
const int pix_width = pixGetWidth(tesseract_->pix_binary());
|
||||
*left = ClipToRange(static_cast<int>(box.left()), 0, pix_width);
|
||||
*top = ClipToRange(pix_height - box.top(), 0, pix_height);
|
||||
*right = ClipToRange(static_cast<int>(box.right()), *left, pix_width);
|
||||
*bottom = ClipToRange(pix_height - box.bottom(), *top, pix_height);
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the bounding rectangle of the current object at the given level in
|
||||
* coordinates of the original image.
|
||||
* See comment on coordinate system above.
|
||||
* Returns false if there is no such object at the current position.
|
||||
*/
|
||||
bool PageIterator::BoundingBox(PageIteratorLevel level,
|
||||
int* left, int* top,
|
||||
int* right, int* bottom) const {
|
||||
return BoundingBox(level, 0, left, top, right, bottom);
|
||||
}
|
||||
|
||||
bool PageIterator::BoundingBox(PageIteratorLevel level, const int padding,
|
||||
int* left, int* top,
|
||||
int* right, int* bottom) const {
|
||||
if (!BoundingBoxInternal(level, left, top, right, bottom))
|
||||
return false;
|
||||
// Convert to the coordinate system of the original image.
|
||||
*left = ClipToRange(*left / scale_ + rect_left_ - padding,
|
||||
rect_left_, rect_left_ + rect_width_);
|
||||
*top = ClipToRange(*top / scale_ + rect_top_ - padding,
|
||||
rect_top_, rect_top_ + rect_height_);
|
||||
*right = ClipToRange((*right + scale_ - 1) / scale_ + rect_left_ + padding,
|
||||
*left, rect_left_ + rect_width_);
|
||||
*bottom = ClipToRange((*bottom + scale_ - 1) / scale_ + rect_top_ + padding,
|
||||
*top, rect_top_ + rect_height_);
|
||||
return true;
|
||||
}
|
||||
|
||||
/** Return that there is no such object at a given level. */
|
||||
bool PageIterator::Empty(PageIteratorLevel level) const {
|
||||
if (it_->block() == NULL) return true; // Already at the end!
|
||||
if (it_->word() == NULL && level != RIL_BLOCK) return true; // image block
|
||||
if (level == RIL_SYMBOL && blob_index_ >= word_length_)
|
||||
return true; // Zero length word, or already at the end of it.
|
||||
return false;
|
||||
}
|
||||
|
||||
/** Returns the type of the current block. See apitypes.h for PolyBlockType. */
|
||||
PolyBlockType PageIterator::BlockType() const {
|
||||
if (it_->block() == NULL || it_->block()->block == NULL)
|
||||
return PT_UNKNOWN; // Already at the end!
|
||||
if (it_->block()->block->poly_block() == NULL)
|
||||
return PT_FLOWING_TEXT; // No layout analysis used - assume text.
|
||||
return it_->block()->block->poly_block()->isA();
|
||||
}
|
||||
|
||||
/** Returns the polygon outline of the current block. The returned Pta must
|
||||
* be ptaDestroy-ed after use. */
|
||||
Pta* PageIterator::BlockPolygon() const {
|
||||
if (it_->block() == NULL || it_->block()->block == NULL)
|
||||
return NULL; // Already at the end!
|
||||
if (it_->block()->block->poly_block() == NULL)
|
||||
return NULL; // No layout analysis used - no polygon.
|
||||
ICOORDELT_IT it(it_->block()->block->poly_block()->points());
|
||||
Pta* pta = ptaCreate(it.length());
|
||||
int num_pts = 0;
|
||||
for (it.mark_cycle_pt(); !it.cycled_list(); it.forward(), ++num_pts) {
|
||||
ICOORD* pt = it.data();
|
||||
// Convert to top-down coords within the input image.
|
||||
float x = static_cast<float>(pt->x()) / scale_ + rect_left_;
|
||||
float y = rect_top_ + rect_height_ - static_cast<float>(pt->y()) / scale_;
|
||||
ptaAddPt(pta, x, y);
|
||||
}
|
||||
return pta;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns a binary image of the current object at the given level.
|
||||
* The position and size match the return from BoundingBoxInternal, and so this
|
||||
* could be upscaled with respect to the original input image.
|
||||
* Use pixDestroy to delete the image after use.
|
||||
* The following methods are used to generate the images:
|
||||
* RIL_BLOCK: mask the page image with the block polygon.
|
||||
* RIL_TEXTLINE: Clip the rectangle of the line box from the page image.
|
||||
* TODO(rays) fix this to generate and use a line polygon.
|
||||
* RIL_WORD: Clip the rectangle of the word box from the page image.
|
||||
* RIL_SYMBOL: Render the symbol outline to an image for cblobs (prior
|
||||
* to recognition) or the bounding box otherwise.
|
||||
* A reconstruction of the original image (using xor to check for double
|
||||
* representation) should be reasonably accurate,
|
||||
* apart from removed noise, at the block level. Below the block level, the
|
||||
* reconstruction will be missing images and line separators.
|
||||
* At the symbol level, kerned characters will be invade the bounding box
|
||||
* if rendered after recognition, making an xor reconstruction inaccurate, but
|
||||
* an or construction better. Before recognition, symbol-level reconstruction
|
||||
* should be good, even with xor, since the images come from the connected
|
||||
* components.
|
||||
*/
|
||||
Pix* PageIterator::GetBinaryImage(PageIteratorLevel level) const {
|
||||
int left, top, right, bottom;
|
||||
if (!BoundingBoxInternal(level, &left, &top, &right, &bottom))
|
||||
return NULL;
|
||||
if (level == RIL_SYMBOL && cblob_it_ != NULL &&
|
||||
cblob_it_->data()->area() != 0)
|
||||
return cblob_it_->data()->render();
|
||||
Box* box = boxCreate(left, top, right - left, bottom - top);
|
||||
Pix* pix = pixClipRectangle(tesseract_->pix_binary(), box, NULL);
|
||||
boxDestroy(&box);
|
||||
if (level == RIL_BLOCK || level == RIL_PARA) {
|
||||
// Clip to the block polygon as well.
|
||||
TBOX mask_box;
|
||||
Pix* mask = it_->block()->block->render_mask(&mask_box);
|
||||
int mask_x = left - mask_box.left();
|
||||
int mask_y = top - (tesseract_->ImageHeight() - mask_box.top());
|
||||
// AND the mask and pix, putting the result in pix.
|
||||
pixRasterop(pix, MAX(0, -mask_x), MAX(0, -mask_y), pixGetWidth(pix),
|
||||
pixGetHeight(pix), PIX_SRC & PIX_DST, mask, MAX(0, mask_x),
|
||||
MAX(0, mask_y));
|
||||
pixDestroy(&mask);
|
||||
}
|
||||
return pix;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns an image of the current object at the given level in greyscale
|
||||
* if available in the input. To guarantee a binary image use BinaryImage.
|
||||
* NOTE that in order to give the best possible image, the bounds are
|
||||
* expanded slightly over the binary connected component, by the supplied
|
||||
* padding, so the top-left position of the returned image is returned
|
||||
* in (left,top). These will most likely not match the coordinates
|
||||
* returned by BoundingBox.
|
||||
* If you do not supply an original image, you will get a binary one.
|
||||
* Use pixDestroy to delete the image after use.
|
||||
*/
|
||||
Pix* PageIterator::GetImage(PageIteratorLevel level, int padding,
|
||||
Pix* original_img,
|
||||
int* left, int* top) const {
|
||||
int right, bottom;
|
||||
if (!BoundingBox(level, left, top, &right, &bottom))
|
||||
return NULL;
|
||||
if (original_img == NULL)
|
||||
return GetBinaryImage(level);
|
||||
|
||||
// Expand the box.
|
||||
*left = MAX(*left - padding, 0);
|
||||
*top = MAX(*top - padding, 0);
|
||||
right = MIN(right + padding, rect_width_);
|
||||
bottom = MIN(bottom + padding, rect_height_);
|
||||
Box* box = boxCreate(*left, *top, right - *left, bottom - *top);
|
||||
Pix* grey_pix = pixClipRectangle(original_img, box, NULL);
|
||||
boxDestroy(&box);
|
||||
if (level == RIL_BLOCK || level == RIL_PARA) {
|
||||
// Clip to the block polygon as well.
|
||||
TBOX mask_box;
|
||||
Pix* mask = it_->block()->block->render_mask(&mask_box);
|
||||
// Copy the mask registered correctly into an image the size of grey_pix.
|
||||
int mask_x = *left - mask_box.left();
|
||||
int mask_y = *top - (pixGetHeight(original_img) - mask_box.top());
|
||||
int width = pixGetWidth(grey_pix);
|
||||
int height = pixGetHeight(grey_pix);
|
||||
Pix* resized_mask = pixCreate(width, height, 1);
|
||||
pixRasterop(resized_mask, MAX(0, -mask_x), MAX(0, -mask_y), width, height,
|
||||
PIX_SRC, mask, MAX(0, mask_x), MAX(0, mask_y));
|
||||
pixDestroy(&mask);
|
||||
pixDilateBrick(resized_mask, resized_mask, 2 * padding + 1,
|
||||
2 * padding + 1);
|
||||
pixInvert(resized_mask, resized_mask);
|
||||
pixSetMasked(grey_pix, resized_mask, MAX_UINT32);
|
||||
pixDestroy(&resized_mask);
|
||||
}
|
||||
return grey_pix;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the baseline of the current object at the given level.
|
||||
* The baseline is the line that passes through (x1, y1) and (x2, y2).
|
||||
* WARNING: with vertical text, baselines may be vertical!
|
||||
*/
|
||||
bool PageIterator::Baseline(PageIteratorLevel level,
|
||||
int* x1, int* y1, int* x2, int* y2) const {
|
||||
if (it_->word() == NULL) return false; // Already at the end!
|
||||
ROW* row = it_->row()->row;
|
||||
WERD* word = it_->word()->word;
|
||||
TBOX box = (level == RIL_WORD || level == RIL_SYMBOL)
|
||||
? word->bounding_box()
|
||||
: row->bounding_box();
|
||||
int left = box.left();
|
||||
ICOORD startpt(left, static_cast<inT16>(row->base_line(left) + 0.5));
|
||||
int right = box.right();
|
||||
ICOORD endpt(right, static_cast<inT16>(row->base_line(right) + 0.5));
|
||||
// Rotate to image coordinates and convert to global image coords.
|
||||
startpt.rotate(it_->block()->block->re_rotation());
|
||||
endpt.rotate(it_->block()->block->re_rotation());
|
||||
*x1 = startpt.x() / scale_ + rect_left_;
|
||||
*y1 = (rect_height_ - startpt.y()) / scale_ + rect_top_;
|
||||
*x2 = endpt.x() / scale_ + rect_left_;
|
||||
*y2 = (rect_height_ - endpt.y()) / scale_ + rect_top_;
|
||||
return true;
|
||||
}
|
||||
|
||||
void PageIterator::Orientation(tesseract::Orientation *orientation,
|
||||
tesseract::WritingDirection *writing_direction,
|
||||
tesseract::TextlineOrder *textline_order,
|
||||
float *deskew_angle) const {
|
||||
BLOCK* block = it_->block()->block;
|
||||
|
||||
// Orientation
|
||||
FCOORD up_in_image(0.0, 1.0);
|
||||
up_in_image.unrotate(block->classify_rotation());
|
||||
up_in_image.rotate(block->re_rotation());
|
||||
|
||||
if (up_in_image.x() == 0.0F) {
|
||||
if (up_in_image.y() > 0.0F) {
|
||||
*orientation = ORIENTATION_PAGE_UP;
|
||||
}
|
||||
else {
|
||||
*orientation = ORIENTATION_PAGE_DOWN;
|
||||
}
|
||||
}
|
||||
else if (up_in_image.x() > 0.0F) {
|
||||
*orientation = ORIENTATION_PAGE_RIGHT;
|
||||
}
|
||||
else {
|
||||
*orientation = ORIENTATION_PAGE_LEFT;
|
||||
}
|
||||
|
||||
return;
|
||||
// Writing direction
|
||||
bool is_vertical_text = (block->classify_rotation().x() == 0.0);
|
||||
bool right_to_left = block->right_to_left();
|
||||
*writing_direction =
|
||||
is_vertical_text
|
||||
? WRITING_DIRECTION_TOP_TO_BOTTOM
|
||||
: (right_to_left
|
||||
? WRITING_DIRECTION_RIGHT_TO_LEFT
|
||||
: WRITING_DIRECTION_LEFT_TO_RIGHT);
|
||||
|
||||
// Textline Order
|
||||
bool is_mongolian = false; // TODO(eger): fix me
|
||||
*textline_order = is_vertical_text
|
||||
? (is_mongolian
|
||||
? TEXTLINE_ORDER_LEFT_TO_RIGHT
|
||||
: TEXTLINE_ORDER_RIGHT_TO_LEFT)
|
||||
: TEXTLINE_ORDER_TOP_TO_BOTTOM;
|
||||
|
||||
// Deskew angle
|
||||
FCOORD skew = block->skew(); // true horizontal for textlines
|
||||
*deskew_angle = -skew.angle();
|
||||
}
|
||||
|
||||
void PageIterator::ParagraphInfo(tesseract::ParagraphJustification *just,
|
||||
bool *is_list_item,
|
||||
bool *is_crown,
|
||||
int *first_line_indent) const {
|
||||
*just = tesseract::JUSTIFICATION_UNKNOWN;
|
||||
if (!it_->row() || !it_->row()->row || !it_->row()->row->para() ||
|
||||
!it_->row()->row->para()->model)
|
||||
return;
|
||||
|
||||
PARA *para = it_->row()->row->para();
|
||||
*is_list_item = para->is_list_item;
|
||||
*is_crown = para->is_very_first_or_continuation;
|
||||
*first_line_indent = para->model->first_indent() -
|
||||
para->model->body_indent();
|
||||
*just = para->model->justification();
|
||||
}
|
||||
|
||||
/**
|
||||
* Sets up the internal data for iterating the blobs of a new word, then
|
||||
* moves the iterator to the given offset.
|
||||
*/
|
||||
void PageIterator::BeginWord(int offset) {
|
||||
WERD_RES* word_res = it_->word();
|
||||
if (word_res == NULL) {
|
||||
// This is a non-text block, so there is no word.
|
||||
word_length_ = 0;
|
||||
blob_index_ = 0;
|
||||
word_ = NULL;
|
||||
return;
|
||||
}
|
||||
if (word_res->best_choice != NULL) {
|
||||
// Recognition has been done, so we are using the box_word, which
|
||||
// is already baseline denormalized.
|
||||
word_length_ = word_res->best_choice->length();
|
||||
if (word_res->box_word != NULL) {
|
||||
if (word_res->box_word->length() != word_length_) {
|
||||
tprintf("Corrupted word! best_choice[len=%d] = %s, box_word[len=%d]: ",
|
||||
word_length_, word_res->best_choice->unichar_string().string(),
|
||||
word_res->box_word->length());
|
||||
word_res->box_word->bounding_box().print();
|
||||
}
|
||||
ASSERT_HOST(word_res->box_word->length() == word_length_);
|
||||
}
|
||||
word_ = NULL;
|
||||
// We will be iterating the box_word.
|
||||
delete cblob_it_;
|
||||
cblob_it_ = NULL;
|
||||
}
|
||||
else {
|
||||
// No recognition yet, so a "symbol" is a cblob.
|
||||
word_ = word_res->word;
|
||||
ASSERT_HOST(word_->cblob_list() != NULL);
|
||||
word_length_ = word_->cblob_list()->length();
|
||||
if (cblob_it_ == NULL) cblob_it_ = new C_BLOB_IT;
|
||||
cblob_it_->set_to_list(word_->cblob_list());
|
||||
}
|
||||
for (blob_index_ = 0; blob_index_ < offset; ++blob_index_) {
|
||||
if (cblob_it_ != NULL)
|
||||
cblob_it_->forward();
|
||||
}
|
||||
}
|
||||
|
||||
bool PageIterator::SetWordBlamerBundle(BlamerBundle *blamer_bundle) {
|
||||
if (it_->word() != NULL) {
|
||||
it_->word()->blamer_bundle = blamer_bundle;
|
||||
return true;
|
||||
}
|
||||
else {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace tesseract.
|
|
@ -0,0 +1,364 @@
|
|||
///////////////////////////////////////////////////////////////////////
|
||||
// File: pageiterator.h
|
||||
// Description: Iterator for tesseract page structure that avoids using
|
||||
// tesseract internal data structures.
|
||||
// Author: Ray Smith
|
||||
// Created: Fri Feb 26 11:01:06 PST 2010
|
||||
//
|
||||
// (C) Copyright 2010, Google Inc.
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
///////////////////////////////////////////////////////////////////////
|
||||
|
||||
#ifndef TESSERACT_CCMAIN_PAGEITERATOR_H__
|
||||
#define TESSERACT_CCMAIN_PAGEITERATOR_H__
|
||||
|
||||
#include "publictypes.h"
|
||||
#include "platform.h"
|
||||
|
||||
struct BlamerBundle;
|
||||
class C_BLOB_IT;
|
||||
class PAGE_RES;
|
||||
class PAGE_RES_IT;
|
||||
class WERD;
|
||||
struct Pix;
|
||||
struct Pta;
|
||||
|
||||
namespace tesseract {
|
||||
|
||||
class Tesseract;
|
||||
|
||||
/**
|
||||
* Class to iterate over tesseract page structure, providing access to all
|
||||
* levels of the page hierarchy, without including any tesseract headers or
|
||||
* having to handle any tesseract structures.
|
||||
* WARNING! This class points to data held within the TessBaseAPI class, and
|
||||
* therefore can only be used while the TessBaseAPI class still exists and
|
||||
* has not been subjected to a call of Init, SetImage, Recognize, Clear, End
|
||||
* DetectOS, or anything else that changes the internal PAGE_RES.
|
||||
* See apitypes.h for the definition of PageIteratorLevel.
|
||||
* See also ResultIterator, derived from PageIterator, which adds in the
|
||||
* ability to access OCR output with text-specific methods.
|
||||
*/
|
||||
|
||||
class TESS_API PageIterator {
|
||||
public:
|
||||
/**
|
||||
* page_res and tesseract come directly from the BaseAPI.
|
||||
* The rectangle parameters are copied indirectly from the Thresholder,
|
||||
* via the BaseAPI. They represent the coordinates of some rectangle in an
|
||||
* original image (in top-left-origin coordinates) and therefore the top-left
|
||||
* needs to be added to any output boxes in order to specify coordinates
|
||||
* in the original image. See TessBaseAPI::SetRectangle.
|
||||
* The scale and scaled_yres are in case the Thresholder scaled the image
|
||||
* rectangle prior to thresholding. Any coordinates in tesseract's image
|
||||
* must be divided by scale before adding (rect_left, rect_top).
|
||||
* The scaled_yres indicates the effective resolution of the binary image
|
||||
* that tesseract has been given by the Thresholder.
|
||||
* After the constructor, Begin has already been called.
|
||||
*/
|
||||
PageIterator(PAGE_RES* page_res, Tesseract* tesseract,
|
||||
int scale, int scaled_yres,
|
||||
int rect_left, int rect_top,
|
||||
int rect_width, int rect_height);
|
||||
virtual ~PageIterator();
|
||||
|
||||
/**
|
||||
* Page/ResultIterators may be copied! This makes it possible to iterate over
|
||||
* all the objects at a lower level, while maintaining an iterator to
|
||||
* objects at a higher level. These constructors DO NOT CALL Begin, so
|
||||
* iterations will continue from the location of src.
|
||||
*/
|
||||
PageIterator(const PageIterator& src);
|
||||
const PageIterator& operator=(const PageIterator& src);
|
||||
|
||||
/** Are we positioned at the same location as other? */
|
||||
bool PositionedAtSameWord(const PAGE_RES_IT* other) const;
|
||||
|
||||
// ============= Moving around within the page ============.
|
||||
|
||||
/**
|
||||
* Moves the iterator to point to the start of the page to begin an
|
||||
* iteration.
|
||||
*/
|
||||
virtual void Begin();
|
||||
|
||||
/**
|
||||
* Moves the iterator to the beginning of the paragraph.
|
||||
* This class implements this functionality by moving it to the zero indexed
|
||||
* blob of the first (leftmost) word on the first row of the paragraph.
|
||||
*/
|
||||
virtual void RestartParagraph();
|
||||
|
||||
/**
|
||||
* Return whether this iterator points anywhere in the first textline of a
|
||||
* paragraph.
|
||||
*/
|
||||
bool IsWithinFirstTextlineOfParagraph() const;
|
||||
|
||||
/**
|
||||
* Moves the iterator to the beginning of the text line.
|
||||
* This class implements this functionality by moving it to the zero indexed
|
||||
* blob of the first (leftmost) word of the row.
|
||||
*/
|
||||
virtual void RestartRow();
|
||||
|
||||
/**
|
||||
* Moves to the start of the next object at the given level in the
|
||||
* page hierarchy, and returns false if the end of the page was reached.
|
||||
* NOTE that RIL_SYMBOL will skip non-text blocks, but all other
|
||||
* PageIteratorLevel level values will visit each non-text block once.
|
||||
* Think of non text blocks as containing a single para, with a single line,
|
||||
* with a single imaginary word.
|
||||
* Calls to Next with different levels may be freely intermixed.
|
||||
* This function iterates words in right-to-left scripts correctly, if
|
||||
* the appropriate language has been loaded into Tesseract.
|
||||
*/
|
||||
virtual bool Next(PageIteratorLevel level);
|
||||
|
||||
/**
|
||||
* Returns true if the iterator is at the start of an object at the given
|
||||
* level.
|
||||
*
|
||||
* For instance, suppose an iterator it is pointed to the first symbol of the
|
||||
* first word of the third line of the second paragraph of the first block in
|
||||
* a page, then:
|
||||
* it.IsAtBeginningOf(RIL_BLOCK) = false
|
||||
* it.IsAtBeginningOf(RIL_PARA) = false
|
||||
* it.IsAtBeginningOf(RIL_TEXTLINE) = true
|
||||
* it.IsAtBeginningOf(RIL_WORD) = true
|
||||
* it.IsAtBeginningOf(RIL_SYMBOL) = true
|
||||
*/
|
||||
virtual bool IsAtBeginningOf(PageIteratorLevel level) const;
|
||||
|
||||
/**
|
||||
* Returns whether the iterator is positioned at the last element in a
|
||||
* given level. (e.g. the last word in a line, the last line in a block)
|
||||
*
|
||||
* Here's some two-paragraph example
|
||||
* text. It starts off innocuously
|
||||
* enough but quickly turns bizarre.
|
||||
* The author inserts a cornucopia
|
||||
* of words to guard against confused
|
||||
* references.
|
||||
*
|
||||
* Now take an iterator it pointed to the start of "bizarre."
|
||||
* it.IsAtFinalElement(RIL_PARA, RIL_SYMBOL) = false
|
||||
* it.IsAtFinalElement(RIL_PARA, RIL_WORD) = true
|
||||
* it.IsAtFinalElement(RIL_BLOCK, RIL_WORD) = false
|
||||
*/
|
||||
virtual bool IsAtFinalElement(PageIteratorLevel level,
|
||||
PageIteratorLevel element) const;
|
||||
|
||||
/**
|
||||
* Returns whether this iterator is positioned
|
||||
* before other: -1
|
||||
* equal to other: 0
|
||||
* after other: 1
|
||||
*/
|
||||
int Cmp(const PageIterator &other) const;
|
||||
|
||||
// ============= Accessing data ==============.
|
||||
// Coordinate system:
|
||||
// Integer coordinates are at the cracks between the pixels.
|
||||
// The top-left corner of the top-left pixel in the image is at (0,0).
|
||||
// The bottom-right corner of the bottom-right pixel in the image is at
|
||||
// (width, height).
|
||||
// Every bounding box goes from the top-left of the top-left contained
|
||||
// pixel to the bottom-right of the bottom-right contained pixel, so
|
||||
// the bounding box of the single top-left pixel in the image is:
|
||||
// (0,0)->(1,1).
|
||||
// If an image rectangle has been set in the API, then returned coordinates
|
||||
// relate to the original (full) image, rather than the rectangle.
|
||||
|
||||
/**
|
||||
* Controls what to include in a bounding box. Bounding boxes of all levels
|
||||
* between RIL_WORD and RIL_BLOCK can include or exclude potential diacritics.
|
||||
* Between layout analysis and recognition, it isn't known where all
|
||||
* diacritics belong, so this control is used to include or exclude some
|
||||
* diacritics that are above or below the main body of the word. In most cases
|
||||
* where the placement is obvious, and after recognition, it doesn't make as
|
||||
* much difference, as the diacritics will already be included in the word.
|
||||
*/
|
||||
void SetBoundingBoxComponents(bool include_upper_dots,
|
||||
bool include_lower_dots) {
|
||||
include_upper_dots_ = include_upper_dots;
|
||||
include_lower_dots_ = include_lower_dots;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the bounding rectangle of the current object at the given level.
|
||||
* See comment on coordinate system above.
|
||||
* Returns false if there is no such object at the current position.
|
||||
* The returned bounding box is guaranteed to match the size and position
|
||||
* of the image returned by GetBinaryImage, but may clip foreground pixels
|
||||
* from a grey image. The padding argument to GetImage can be used to expand
|
||||
* the image to include more foreground pixels. See GetImage below.
|
||||
*/
|
||||
bool BoundingBox(PageIteratorLevel level,
|
||||
int* left, int* top, int* right, int* bottom) const;
|
||||
bool BoundingBox(PageIteratorLevel level, const int padding,
|
||||
int* left, int* top, int* right, int* bottom) const;
|
||||
/**
|
||||
* Returns the bounding rectangle of the object in a coordinate system of the
|
||||
* working image rectangle having its origin at (rect_left_, rect_top_) with
|
||||
* respect to the original image and is scaled by a factor scale_.
|
||||
*/
|
||||
bool BoundingBoxInternal(PageIteratorLevel level,
|
||||
int* left, int* top, int* right, int* bottom) const;
|
||||
|
||||
/** Returns whether there is no object of a given level. */
|
||||
bool Empty(PageIteratorLevel level) const;
|
||||
|
||||
/**
|
||||
* Returns the type of the current block. See apitypes.h for
|
||||
* PolyBlockType.
|
||||
*/
|
||||
PolyBlockType BlockType() const;
|
||||
|
||||
/**
|
||||
* Returns the polygon outline of the current block. The returned Pta must
|
||||
* be ptaDestroy-ed after use. Note that the returned Pta lists the vertices
|
||||
* of the polygon, and the last edge is the line segment between the last
|
||||
* point and the first point. NULL will be returned if the iterator is
|
||||
* at the end of the document or layout analysis was not used.
|
||||
*/
|
||||
Pta* BlockPolygon() const;
|
||||
|
||||
/**
|
||||
* Returns a binary image of the current object at the given level.
|
||||
* The position and size match the return from BoundingBoxInternal, and so
|
||||
* this could be upscaled with respect to the original input image.
|
||||
* Use pixDestroy to delete the image after use.
|
||||
*/
|
||||
Pix* GetBinaryImage(PageIteratorLevel level) const;
|
||||
|
||||
/**
|
||||
* Returns an image of the current object at the given level in greyscale
|
||||
* if available in the input. To guarantee a binary image use BinaryImage.
|
||||
* NOTE that in order to give the best possible image, the bounds are
|
||||
* expanded slightly over the binary connected component, by the supplied
|
||||
* padding, so the top-left position of the returned image is returned
|
||||
* in (left,top). These will most likely not match the coordinates
|
||||
* returned by BoundingBox.
|
||||
* If you do not supply an original image, you will get a binary one.
|
||||
* Use pixDestroy to delete the image after use.
|
||||
*/
|
||||
Pix* GetImage(PageIteratorLevel level, int padding, Pix* original_img,
|
||||
int* left, int* top) const;
|
||||
|
||||
/**
|
||||
* Returns the baseline of the current object at the given level.
|
||||
* The baseline is the line that passes through (x1, y1) and (x2, y2).
|
||||
* WARNING: with vertical text, baselines may be vertical!
|
||||
* Returns false if there is no baseline at the current position.
|
||||
*/
|
||||
bool Baseline(PageIteratorLevel level,
|
||||
int* x1, int* y1, int* x2, int* y2) const;
|
||||
|
||||
/**
|
||||
* Returns orientation for the block the iterator points to.
|
||||
* orientation, writing_direction, textline_order: see publictypes.h
|
||||
* deskew_angle: after rotating the block so the text orientation is
|
||||
* upright, how many radians does one have to rotate the
|
||||
* block anti-clockwise for it to be level?
|
||||
* -Pi/4 <= deskew_angle <= Pi/4
|
||||
*/
|
||||
void Orientation(tesseract::Orientation *orientation,
|
||||
tesseract::WritingDirection *writing_direction,
|
||||
tesseract::TextlineOrder *textline_order,
|
||||
float *deskew_angle) const;
|
||||
|
||||
/**
|
||||
* Returns information about the current paragraph, if available.
|
||||
*
|
||||
* justification -
|
||||
* LEFT if ragged right, or fully justified and script is left-to-right.
|
||||
* RIGHT if ragged left, or fully justified and script is right-to-left.
|
||||
* unknown if it looks like source code or we have very few lines.
|
||||
* is_list_item -
|
||||
* true if we believe this is a member of an ordered or unordered list.
|
||||
* is_crown -
|
||||
* true if the first line of the paragraph is aligned with the other
|
||||
* lines of the paragraph even though subsequent paragraphs have first
|
||||
* line indents. This typically indicates that this is the continuation
|
||||
* of a previous paragraph or that it is the very first paragraph in
|
||||
* the chapter.
|
||||
* first_line_indent -
|
||||
* For LEFT aligned paragraphs, the first text line of paragraphs of
|
||||
* this kind are indented this many pixels from the left edge of the
|
||||
* rest of the paragraph.
|
||||
* for RIGHT aligned paragraphs, the first text line of paragraphs of
|
||||
* this kind are indented this many pixels from the right edge of the
|
||||
* rest of the paragraph.
|
||||
* NOTE 1: This value may be negative.
|
||||
* NOTE 2: if *is_crown == true, the first line of this paragraph is
|
||||
* actually flush, and first_line_indent is set to the "common"
|
||||
* first_line_indent for subsequent paragraphs in this block
|
||||
* of text.
|
||||
*/
|
||||
void ParagraphInfo(tesseract::ParagraphJustification *justification,
|
||||
bool *is_list_item,
|
||||
bool *is_crown,
|
||||
int *first_line_indent) const;
|
||||
|
||||
// If the current WERD_RES (it_->word()) is not NULL, sets the BlamerBundle
|
||||
// of the current word to the given pointer (takes ownership of the pointer)
|
||||
// and returns true.
|
||||
// Can only be used when iterating on the word level.
|
||||
bool SetWordBlamerBundle(BlamerBundle *blamer_bundle);
|
||||
|
||||
protected:
|
||||
/**
|
||||
* Sets up the internal data for iterating the blobs of a new word, then
|
||||
* moves the iterator to the given offset.
|
||||
*/
|
||||
TESS_LOCAL void BeginWord(int offset);
|
||||
|
||||
/** Pointer to the page_res owned by the API. */
|
||||
PAGE_RES* page_res_;
|
||||
/** Pointer to the Tesseract object owned by the API. */
|
||||
Tesseract* tesseract_;
|
||||
/**
|
||||
* The iterator to the page_res_. Owned by this ResultIterator.
|
||||
* A pointer just to avoid dragging in Tesseract includes.
|
||||
*/
|
||||
PAGE_RES_IT* it_;
|
||||
/**
|
||||
* The current input WERD being iterated. If there is an output from OCR,
|
||||
* then word_ is NULL. Owned by the API
|
||||
*/
|
||||
WERD* word_;
|
||||
/** The length of the current word_. */
|
||||
int word_length_;
|
||||
/** The current blob index within the word. */
|
||||
int blob_index_;
|
||||
/**
|
||||
* Iterator to the blobs within the word. If NULL, then we are iterating
|
||||
* OCR results in the box_word.
|
||||
* Owned by this ResultIterator.
|
||||
*/
|
||||
C_BLOB_IT* cblob_it_;
|
||||
/** Control over what to include in bounding boxes. */
|
||||
bool include_upper_dots_;
|
||||
bool include_lower_dots_;
|
||||
/** Parameters saved from the Thresholder. Needed to rebuild coordinates.*/
|
||||
int scale_;
|
||||
int scaled_yres_;
|
||||
int rect_left_;
|
||||
int rect_top_;
|
||||
int rect_width_;
|
||||
int rect_height_;
|
||||
};
|
||||
|
||||
} // namespace tesseract.
|
||||
|
||||
#endif // TESSERACT_CCMAIN_PAGEITERATOR_H__
|
|
@ -0,0 +1,434 @@
|
|||
/**********************************************************************
|
||||
* File: pagesegmain.cpp
|
||||
* Description: Top-level page segmenter for Tesseract.
|
||||
* Author: Ray Smith
|
||||
* Created: Thu Sep 25 17:12:01 PDT 2008
|
||||
*
|
||||
* (C) Copyright 2008, Google Inc.
|
||||
** Licensed under the Apache License, Version 2.0 (the "License");
|
||||
** you may not use this file except in compliance with the License.
|
||||
** You may obtain a copy of the License at
|
||||
** http://www.apache.org/licenses/LICENSE-2.0
|
||||
** Unless required by applicable law or agreed to in writing, software
|
||||
** distributed under the License is distributed on an "AS IS" BASIS,
|
||||
** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
** See the License for the specific language governing permissions and
|
||||
** limitations under the License.
|
||||
*
|
||||
**********************************************************************/
|
||||
|
||||
#ifdef _WIN32
|
||||
#ifndef unlink
|
||||
#include <io.h>
|
||||
#endif
|
||||
#else
|
||||
#include <unistd.h>
|
||||
#endif // _WIN32
|
||||
#ifdef _MSC_VER
|
||||
#pragma warning(disable:4244) // Conversion warnings
|
||||
#endif
|
||||
|
||||
// Include automatically generated configuration file if running autoconf.
|
||||
#ifdef HAVE_CONFIG_H
|
||||
#include "config_auto.h"
|
||||
#endif
|
||||
|
||||
#include "allheaders.h"
|
||||
#include "blobbox.h"
|
||||
#include "blread.h"
|
||||
#include "colfind.h"
|
||||
#include "equationdetect.h"
|
||||
#include "imagefind.h"
|
||||
#include "linefind.h"
|
||||
#include "makerow.h"
|
||||
#include "osdetect.h"
|
||||
#include "tabvector.h"
|
||||
#include "tesseractclass.h"
|
||||
#include "tessvars.h"
|
||||
#include "textord.h"
|
||||
#include "tordmain.h"
|
||||
#include "wordseg.h"
|
||||
|
||||
namespace tesseract {
|
||||
|
||||
// Max erosions to perform in removing an enclosing circle.
|
||||
const int kMaxCircleErosions = 8;
|
||||
|
||||
// Helper to remove an enclosing circle from an image.
|
||||
// If there isn't one, then the image will most likely get badly mangled.
|
||||
// The returned pix must be pixDestroyed after use. NULL may be returned
|
||||
// if the image doesn't meet the trivial conditions that it uses to determine
|
||||
// success.
|
||||
static Pix* RemoveEnclosingCircle(Pix* pixs) {
|
||||
Pix* pixsi = pixInvert(NULL, pixs);
|
||||
Pix* pixc = pixCreateTemplate(pixs);
|
||||
pixSetOrClearBorder(pixc, 1, 1, 1, 1, PIX_SET);
|
||||
pixSeedfillBinary(pixc, pixc, pixsi, 4);
|
||||
pixInvert(pixc, pixc);
|
||||
pixDestroy(&pixsi);
|
||||
Pix* pixt = pixAnd(NULL, pixs, pixc);
|
||||
l_int32 max_count;
|
||||
pixCountConnComp(pixt, 8, &max_count);
|
||||
// The count has to go up before we start looking for the minimum.
|
||||
l_int32 min_count = MAX_INT32;
|
||||
Pix* pixout = NULL;
|
||||
for (int i = 1; i < kMaxCircleErosions; i++) {
|
||||
pixDestroy(&pixt);
|
||||
pixErodeBrick(pixc, pixc, 3, 3);
|
||||
pixt = pixAnd(NULL, pixs, pixc);
|
||||
l_int32 count;
|
||||
pixCountConnComp(pixt, 8, &count);
|
||||
if (i == 1 || count > max_count) {
|
||||
max_count = count;
|
||||
min_count = count;
|
||||
}
|
||||
else if (i > 1 && count < min_count) {
|
||||
min_count = count;
|
||||
pixDestroy(&pixout);
|
||||
pixout = pixCopy(NULL, pixt); // Save the best.
|
||||
}
|
||||
else if (count >= min_count) {
|
||||
break; // We have passed by the best.
|
||||
}
|
||||
}
|
||||
pixDestroy(&pixt);
|
||||
pixDestroy(&pixc);
|
||||
return pixout;
|
||||
}
|
||||
|
||||
/**
|
||||
* Segment the page according to the current value of tessedit_pageseg_mode.
|
||||
* pix_binary_ is used as the source image and should not be NULL.
|
||||
* On return the blocks list owns all the constructed page layout.
|
||||
*/
|
||||
int Tesseract::SegmentPage(const STRING* input_file, BLOCK_LIST* blocks,
|
||||
Tesseract* osd_tess, OSResults* osr) {
|
||||
ASSERT_HOST(pix_binary_ != NULL);
|
||||
int width = pixGetWidth(pix_binary_);
|
||||
int height = pixGetHeight(pix_binary_);
|
||||
// Get page segmentation mode.
|
||||
PageSegMode pageseg_mode = static_cast<PageSegMode>(
|
||||
static_cast<int>(tessedit_pageseg_mode));
|
||||
// If a UNLV zone file can be found, use that instead of segmentation.
|
||||
if (!PSM_COL_FIND_ENABLED(pageseg_mode) &&
|
||||
input_file != NULL && input_file->length() > 0) {
|
||||
STRING name = *input_file;
|
||||
const char* lastdot = strrchr(name.string(), '.');
|
||||
if (lastdot != NULL)
|
||||
name[lastdot - name.string()] = '\0';
|
||||
read_unlv_file(name, width, height, blocks);
|
||||
}
|
||||
if (blocks->empty()) {
|
||||
// No UNLV file present. Work according to the PageSegMode.
|
||||
// First make a single block covering the whole image.
|
||||
BLOCK_IT block_it(blocks);
|
||||
BLOCK* block = new BLOCK("", TRUE, 0, 0, 0, 0, width, height);
|
||||
block->set_right_to_left(right_to_left());
|
||||
block_it.add_to_end(block);
|
||||
}
|
||||
else {
|
||||
// UNLV file present. Use PSM_SINGLE_BLOCK.
|
||||
pageseg_mode = PSM_SINGLE_BLOCK;
|
||||
}
|
||||
// The diacritic_blobs holds noise blobs that may be diacritics. They
|
||||
// are separated out on areas of the image that seem noisy and short-circuit
|
||||
// the layout process, going straight from the initial partition creation
|
||||
// right through to after word segmentation, where they are added to the
|
||||
// rej_cblobs list of the most appropriate word. From there classification
|
||||
// will determine whether they are used.
|
||||
BLOBNBOX_LIST diacritic_blobs;
|
||||
int auto_page_seg_ret_val = 0;
|
||||
TO_BLOCK_LIST to_blocks;
|
||||
if (PSM_OSD_ENABLED(pageseg_mode) || PSM_BLOCK_FIND_ENABLED(pageseg_mode) ||
|
||||
PSM_SPARSE(pageseg_mode)) {
|
||||
auto_page_seg_ret_val = AutoPageSeg(
|
||||
pageseg_mode, blocks, &to_blocks,
|
||||
enable_noise_removal ? &diacritic_blobs : NULL, osd_tess, osr);
|
||||
if (pageseg_mode == PSM_OSD_ONLY)
|
||||
return auto_page_seg_ret_val;
|
||||
// To create blobs from the image region bounds uncomment this line:
|
||||
// to_blocks.clear(); // Uncomment to go back to the old mode.
|
||||
}
|
||||
else {
|
||||
deskew_ = FCOORD(1.0f, 0.0f);
|
||||
reskew_ = FCOORD(1.0f, 0.0f);
|
||||
if (pageseg_mode == PSM_CIRCLE_WORD) {
|
||||
Pix* pixcleaned = RemoveEnclosingCircle(pix_binary_);
|
||||
if (pixcleaned != NULL) {
|
||||
pixDestroy(&pix_binary_);
|
||||
pix_binary_ = pixcleaned;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (auto_page_seg_ret_val < 0) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
if (blocks->empty()) {
|
||||
if (textord_debug_tabfind)
|
||||
tprintf("Empty page\n");
|
||||
return 0; // AutoPageSeg found an empty page.
|
||||
}
|
||||
bool splitting =
|
||||
pageseg_devanagari_split_strategy != ShiroRekhaSplitter::NO_SPLIT;
|
||||
bool cjk_mode = textord_use_cjk_fp_model;
|
||||
|
||||
textord_.TextordPage(pageseg_mode, reskew_, width, height, pix_binary_,
|
||||
pix_thresholds_, pix_grey_, splitting || cjk_mode,
|
||||
&diacritic_blobs, blocks, &to_blocks);
|
||||
return auto_page_seg_ret_val;
|
||||
}
|
||||
|
||||
// Helper writes a grey image to a file for use by scrollviewer.
|
||||
// Normally for speed we don't display the image in the layout debug windows.
|
||||
// If textord_debug_images is true, we draw the image as a background to some
|
||||
// of the debug windows. printable determines whether these
|
||||
// images are optimized for printing instead of screen display.
|
||||
static void WriteDebugBackgroundImage(bool printable, Pix* pix_binary) {
|
||||
Pix* grey_pix = pixCreate(pixGetWidth(pix_binary),
|
||||
pixGetHeight(pix_binary), 8);
|
||||
// Printable images are light grey on white, but for screen display
|
||||
// they are black on dark grey so the other colors show up well.
|
||||
if (printable) {
|
||||
pixSetAll(grey_pix);
|
||||
pixSetMasked(grey_pix, pix_binary, 192);
|
||||
}
|
||||
else {
|
||||
pixSetAllArbitrary(grey_pix, 64);
|
||||
pixSetMasked(grey_pix, pix_binary, 0);
|
||||
}
|
||||
AlignedBlob::IncrementDebugPix();
|
||||
pixWrite(AlignedBlob::textord_debug_pix().string(), grey_pix, IFF_PNG);
|
||||
pixDestroy(&grey_pix);
|
||||
}
|
||||
|
||||
/**
|
||||
* Auto page segmentation. Divide the page image into blocks of uniform
|
||||
* text linespacing and images.
|
||||
*
|
||||
* Resolution (in ppi) is derived from the input image.
|
||||
*
|
||||
* The output goes in the blocks list with corresponding TO_BLOCKs in the
|
||||
* to_blocks list.
|
||||
*
|
||||
* If !PSM_COL_FIND_ENABLED(pageseg_mode), then no attempt is made to divide
|
||||
* the image into columns, but multiple blocks are still made if the text is
|
||||
* of non-uniform linespacing.
|
||||
*
|
||||
* If diacritic_blobs is non-null, then diacritics/noise blobs, that would
|
||||
* confuse layout anaylsis by causing textline overlap, are placed there,
|
||||
* with the expectation that they will be reassigned to words later and
|
||||
* noise/diacriticness determined via classification.
|
||||
*
|
||||
* If osd (orientation and script detection) is true then that is performed
|
||||
* as well. If only_osd is true, then only orientation and script detection is
|
||||
* performed. If osd is desired, (osd or only_osd) then osr_tess must be
|
||||
* another Tesseract that was initialized especially for osd, and the results
|
||||
* will be output into osr (orientation and script result).
|
||||
*/
|
||||
int Tesseract::AutoPageSeg(PageSegMode pageseg_mode, BLOCK_LIST* blocks,
|
||||
TO_BLOCK_LIST* to_blocks,
|
||||
BLOBNBOX_LIST* diacritic_blobs, Tesseract* osd_tess,
|
||||
OSResults* osr) {
|
||||
if (textord_debug_images) {
|
||||
WriteDebugBackgroundImage(textord_debug_printable, pix_binary_);
|
||||
}
|
||||
Pix* photomask_pix = NULL;
|
||||
Pix* musicmask_pix = NULL;
|
||||
// The blocks made by the ColumnFinder. Moved to blocks before return.
|
||||
BLOCK_LIST found_blocks;
|
||||
TO_BLOCK_LIST temp_blocks;
|
||||
|
||||
ColumnFinder* finder = SetupPageSegAndDetectOrientation(
|
||||
pageseg_mode, blocks, osd_tess, osr, &temp_blocks, &photomask_pix,
|
||||
&musicmask_pix);
|
||||
int result = 0;
|
||||
if (finder != NULL) {
|
||||
TO_BLOCK_IT to_block_it(&temp_blocks);
|
||||
TO_BLOCK* to_block = to_block_it.data();
|
||||
if (musicmask_pix != NULL) {
|
||||
// TODO(rays) pass the musicmask_pix into FindBlocks and mark music
|
||||
// blocks separately. For now combine with photomask_pix.
|
||||
pixOr(photomask_pix, photomask_pix, musicmask_pix);
|
||||
}
|
||||
if (equ_detect_) {
|
||||
finder->SetEquationDetect(equ_detect_);
|
||||
}
|
||||
/* ÆÁ±ÎÍáб½Ç¶Èʶ±ð
|
||||
result = finder->FindBlocks(
|
||||
pageseg_mode, scaled_color_, scaled_factor_, to_block, photomask_pix,
|
||||
pix_thresholds_, pix_grey_, &found_blocks, diacritic_blobs, to_blocks);
|
||||
if (result >= 0)
|
||||
finder->GetDeskewVectors(&deskew_, &reskew_);
|
||||
*/
|
||||
delete finder;
|
||||
}
|
||||
pixDestroy(&photomask_pix);
|
||||
pixDestroy(&musicmask_pix);
|
||||
if (result < 0) return result;
|
||||
|
||||
blocks->clear();
|
||||
BLOCK_IT block_it(blocks);
|
||||
// Move the found blocks to the input/output blocks.
|
||||
block_it.add_list_after(&found_blocks);
|
||||
|
||||
if (textord_debug_images) {
|
||||
// The debug image is no longer needed so delete it.
|
||||
unlink(AlignedBlob::textord_debug_pix().string());
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
// Helper adds all the scripts from sid_set converted to ids from osd_set to
|
||||
// allowed_ids.
|
||||
static void AddAllScriptsConverted(const UNICHARSET& sid_set,
|
||||
const UNICHARSET& osd_set,
|
||||
GenericVector<int>* allowed_ids) {
|
||||
for (int i = 0; i < sid_set.get_script_table_size(); ++i) {
|
||||
if (i != sid_set.null_sid()) {
|
||||
const char* script = sid_set.get_script_from_script_id(i);
|
||||
allowed_ids->push_back(osd_set.get_script_id_from_name(script));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Sets up auto page segmentation, determines the orientation, and corrects it.
|
||||
* Somewhat arbitrary chunk of functionality, factored out of AutoPageSeg to
|
||||
* facilitate testing.
|
||||
* photo_mask_pix is a pointer to a NULL pointer that will be filled on return
|
||||
* with the leptonica photo mask, which must be pixDestroyed by the caller.
|
||||
* to_blocks is an empty list that will be filled with (usually a single)
|
||||
* block that is used during layout analysis. This ugly API is required
|
||||
* because of the possibility of a unlv zone file.
|
||||
* TODO(rays) clean this up.
|
||||
* See AutoPageSeg for other arguments.
|
||||
* The returned ColumnFinder must be deleted after use.
|
||||
*/
|
||||
ColumnFinder* Tesseract::SetupPageSegAndDetectOrientation(
|
||||
PageSegMode pageseg_mode, BLOCK_LIST* blocks, Tesseract* osd_tess,
|
||||
OSResults* osr, TO_BLOCK_LIST* to_blocks, Pix** photo_mask_pix,
|
||||
Pix** music_mask_pix) {
|
||||
int vertical_x = 0;
|
||||
int vertical_y = 1;
|
||||
TabVector_LIST v_lines;
|
||||
TabVector_LIST h_lines;
|
||||
ICOORD bleft(0, 0);
|
||||
|
||||
ASSERT_HOST(pix_binary_ != NULL);
|
||||
if (tessedit_dump_pageseg_images) {
|
||||
pixWrite("tessinput.png", pix_binary_, IFF_PNG);
|
||||
}
|
||||
// Leptonica is used to find the rule/separator lines in the input.
|
||||
LineFinder::FindAndRemoveLines(source_resolution_,
|
||||
textord_tabfind_show_vlines, pix_binary_,
|
||||
&vertical_x, &vertical_y, music_mask_pix,
|
||||
&v_lines, &h_lines);
|
||||
if (tessedit_dump_pageseg_images)
|
||||
pixWrite("tessnolines.png", pix_binary_, IFF_PNG);
|
||||
// Leptonica is used to find a mask of the photo regions in the input.
|
||||
*photo_mask_pix = ImageFind::FindImages(pix_binary_);
|
||||
if (tessedit_dump_pageseg_images)
|
||||
pixWrite("tessnoimages.png", pix_binary_, IFF_PNG);
|
||||
if (!PSM_COL_FIND_ENABLED(pageseg_mode)) v_lines.clear();
|
||||
|
||||
// The rest of the algorithm uses the usual connected components.
|
||||
textord_.find_components(pix_binary_, blocks, to_blocks);
|
||||
|
||||
TO_BLOCK_IT to_block_it(to_blocks);
|
||||
// There must be exactly one input block.
|
||||
// TODO(rays) handle new textline finding with a UNLV zone file.
|
||||
ASSERT_HOST(to_blocks->singleton());
|
||||
TO_BLOCK* to_block = to_block_it.data();
|
||||
TBOX blkbox = to_block->block->bounding_box();
|
||||
ColumnFinder* finder = NULL;
|
||||
|
||||
if (to_block->line_size >= 2) {
|
||||
finder = new ColumnFinder(static_cast<int>(to_block->line_size),
|
||||
blkbox.botleft(), blkbox.topright(),
|
||||
source_resolution_, textord_use_cjk_fp_model,
|
||||
textord_tabfind_aligned_gap_fraction,
|
||||
&v_lines, &h_lines, vertical_x, vertical_y);
|
||||
|
||||
finder->SetupAndFilterNoise(pageseg_mode, *photo_mask_pix, to_block);
|
||||
|
||||
if (equ_detect_) {
|
||||
equ_detect_->LabelSpecialText(to_block);
|
||||
}
|
||||
|
||||
BLOBNBOX_CLIST osd_blobs;
|
||||
// osd_orientation is the number of 90 degree rotations to make the
|
||||
// characters upright. (See osdetect.h for precise definition.)
|
||||
// We want the text lines horizontal, (vertical text indicates vertical
|
||||
// textlines) which may conflict (eg vertically written CJK).
|
||||
int osd_orientation = 0;
|
||||
bool vertical_text = textord_tabfind_force_vertical_text ||
|
||||
pageseg_mode == PSM_SINGLE_BLOCK_VERT_TEXT;
|
||||
if (!vertical_text && textord_tabfind_vertical_text &&
|
||||
PSM_ORIENTATION_ENABLED(pageseg_mode)) {
|
||||
vertical_text =
|
||||
finder->IsVerticallyAlignedText(textord_tabfind_vertical_text_ratio,
|
||||
to_block, &osd_blobs);
|
||||
}
|
||||
if (PSM_OSD_ENABLED(pageseg_mode) && osd_tess != NULL && osr != NULL) {
|
||||
GenericVector<int> osd_scripts;
|
||||
if (osd_tess != this) {
|
||||
// We are running osd as part of layout analysis, so constrain the
|
||||
// scripts to those allowed by *this.
|
||||
AddAllScriptsConverted(unicharset, osd_tess->unicharset, &osd_scripts);
|
||||
for (int s = 0; s < sub_langs_.size(); ++s) {
|
||||
AddAllScriptsConverted(sub_langs_[s]->unicharset,
|
||||
osd_tess->unicharset, &osd_scripts);
|
||||
}
|
||||
}
|
||||
os_detect_blobs(&osd_scripts, &osd_blobs, osr, osd_tess);
|
||||
if (pageseg_mode == PSM_OSD_ONLY) {
|
||||
delete finder;
|
||||
return NULL;
|
||||
}
|
||||
osd_orientation = osr->best_result.orientation_id;
|
||||
double osd_score = osr->orientations[osd_orientation];
|
||||
double osd_margin = min_orientation_margin * 2;
|
||||
for (int i = 0; i < 4; ++i) {
|
||||
if (i != osd_orientation &&
|
||||
osd_score - osr->orientations[i] < osd_margin) {
|
||||
osd_margin = osd_score - osr->orientations[i];
|
||||
}
|
||||
}
|
||||
int best_script_id = osr->best_result.script_id;
|
||||
const char* best_script_str =
|
||||
osd_tess->unicharset.get_script_from_script_id(best_script_id);
|
||||
bool cjk = best_script_id == osd_tess->unicharset.han_sid() ||
|
||||
best_script_id == osd_tess->unicharset.hiragana_sid() ||
|
||||
best_script_id == osd_tess->unicharset.katakana_sid() ||
|
||||
strcmp("Japanese", best_script_str) == 0 ||
|
||||
strcmp("Korean", best_script_str) == 0 ||
|
||||
strcmp("Hangul", best_script_str) == 0;
|
||||
if (cjk) {
|
||||
finder->set_cjk_script(true);
|
||||
}
|
||||
if (osd_margin < min_orientation_margin) {
|
||||
// The margin is weak.
|
||||
if (!cjk && !vertical_text && osd_orientation == 2) {
|
||||
// upside down latin text is improbable with such a weak margin.
|
||||
tprintf("OSD: Weak margin (%.2f), horiz textlines, not CJK: "
|
||||
"Don't rotate.\n", osd_margin);
|
||||
osd_orientation = 0;
|
||||
}
|
||||
else {
|
||||
tprintf(
|
||||
"OSD: Weak margin (%.2f) for %d blob text block, "
|
||||
"but using orientation anyway: %d\n",
|
||||
osd_margin, osd_blobs.length(), osd_orientation);
|
||||
}
|
||||
}
|
||||
}
|
||||
osd_blobs.shallow_clear();
|
||||
finder->CorrectOrientation(to_block, vertical_text, osd_orientation);
|
||||
}
|
||||
|
||||
return finder;
|
||||
}
|
||||
|
||||
} // namespace tesseract.
|
|
@ -0,0 +1,43 @@
|
|||
/**********************************************************************
|
||||
* File: pagewalk.cpp (Formerly walkers.c)
|
||||
* Description: Block list processors
|
||||
* Author: Phil Cheatle
|
||||
* Created: Thu Oct 10 16:25:24 BST 1991
|
||||
*
|
||||
* (C) Copyright 1991, Hewlett-Packard Ltd.
|
||||
** Licensed under the Apache License, Version 2.0 (the "License");
|
||||
** you may not use this file except in compliance with the License.
|
||||
** You may obtain a copy of the License at
|
||||
** http://www.apache.org/licenses/LICENSE-2.0
|
||||
** Unless required by applicable law or agreed to in writing, software
|
||||
** distributed under the License is distributed on an "AS IS" BASIS,
|
||||
** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
** See the License for the specific language governing permissions and
|
||||
** limitations under the License.
|
||||
*
|
||||
**********************************************************************/
|
||||
|
||||
#include "pageres.h"
|
||||
#include "tesseractclass.h"
|
||||
|
||||
namespace tesseract {
|
||||
/**
|
||||
* @name process_selected_words()
|
||||
*
|
||||
* Walk the current block list applying the specified word processor function
|
||||
* to each word that overlaps the selection_box.
|
||||
*/
|
||||
void Tesseract::process_selected_words(
|
||||
PAGE_RES* page_res, // blocks to check
|
||||
TBOX & selection_box,
|
||||
BOOL8(tesseract::Tesseract::*word_processor)(PAGE_RES_IT* pr_it)) {
|
||||
for (PAGE_RES_IT page_res_it(page_res); page_res_it.word() != NULL;
|
||||
page_res_it.forward()) {
|
||||
WERD* word = page_res_it.word()->word;
|
||||
if (word->bounding_box().overlap(selection_box)) {
|
||||
if (!(this->*word_processor)(&page_res_it))
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
} // namespace tesseract
|
|
@ -0,0 +1,69 @@
|
|||
///////////////////////////////////////////////////////////////////////
|
||||
// File: par_control.cpp
|
||||
// Description: Control code for parallel implementation.
|
||||
// Author: Ray Smith
|
||||
// Created: Mon Nov 04 13:23:15 PST 2013
|
||||
//
|
||||
// (C) Copyright 2013, Google Inc.
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
///////////////////////////////////////////////////////////////////////
|
||||
|
||||
#include "tesseractclass.h"
|
||||
|
||||
namespace tesseract {
|
||||
|
||||
struct BlobData {
|
||||
BlobData() : blob(NULL), choices(NULL) {}
|
||||
BlobData(int index, Tesseract* tess, const WERD_RES& word)
|
||||
: blob(word.chopped_word->blobs[index]),
|
||||
tesseract(tess),
|
||||
choices(&(*word.ratings)(index, index)) {}
|
||||
|
||||
TBLOB* blob;
|
||||
Tesseract* tesseract;
|
||||
BLOB_CHOICE_LIST** choices;
|
||||
};
|
||||
|
||||
void Tesseract::PrerecAllWordsPar(const GenericVector<WordData>& words) {
|
||||
// Prepare all the blobs.
|
||||
GenericVector<BlobData> blobs;
|
||||
for (int w = 0; w < words.size(); ++w) {
|
||||
if (words[w].word->ratings != NULL &&
|
||||
words[w].word->ratings->get(0, 0) == NULL) {
|
||||
for (int s = 0; s < words[w].lang_words.size(); ++s) {
|
||||
Tesseract* sub = s < sub_langs_.size() ? sub_langs_[s] : this;
|
||||
const WERD_RES& word = *words[w].lang_words[s];
|
||||
for (int b = 0; b < word.chopped_word->NumBlobs(); ++b) {
|
||||
blobs.push_back(BlobData(b, sub, word));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
// Pre-classify all the blobs.
|
||||
if (tessedit_parallelize > 1) {
|
||||
for (int b = 0; b < blobs.size(); ++b) {
|
||||
*blobs[b].choices =
|
||||
blobs[b].tesseract->classify_blob(blobs[b].blob, "par", White, NULL);
|
||||
}
|
||||
}
|
||||
else {
|
||||
// TODO(AMD) parallelize this.
|
||||
for (int b = 0; b < blobs.size(); ++b) {
|
||||
*blobs[b].choices =
|
||||
blobs[b].tesseract->classify_blob(blobs[b].blob, "par", White, NULL);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace tesseract.
|
||||
|
||||
|
File diff suppressed because it is too large
Load Diff
|
@ -0,0 +1,108 @@
|
|||
/**********************************************************************
|
||||
* File: paragraphs.h
|
||||
* Description: Paragraph Detection data structures.
|
||||
* Author: David Eger
|
||||
* Created: 25 February 2011
|
||||
*
|
||||
* (C) Copyright 2011, Google Inc.
|
||||
** Licensed under the Apache License, Version 2.0 (the "License");
|
||||
** you may not use this file except in compliance with the License.
|
||||
** You may obtain a copy of the License at
|
||||
** http://www.apache.org/licenses/LICENSE-2.0
|
||||
** Unless required by applicable law or agreed to in writing, software
|
||||
** distributed under the License is distributed on an "AS IS" BASIS,
|
||||
** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
** See the License for the specific language governing permissions and
|
||||
** limitations under the License.
|
||||
*
|
||||
**********************************************************************/
|
||||
|
||||
#ifndef TESSERACT_CCMAIN_PARAGRAPHS_H_
|
||||
#define TESSERACT_CCMAIN_PARAGRAPHS_H_
|
||||
|
||||
#include "rect.h"
|
||||
#include "ocrpara.h"
|
||||
#include "genericvector.h"
|
||||
#include "strngs.h"
|
||||
|
||||
|
||||
class WERD;
|
||||
class UNICHARSET;
|
||||
|
||||
namespace tesseract {
|
||||
|
||||
class MutableIterator;
|
||||
|
||||
// This structure captures all information needed about a text line for the
|
||||
// purposes of paragraph detection. It is meant to be exceedingly light-weight
|
||||
// so that we can easily test paragraph detection independent of the rest of
|
||||
// Tesseract.
|
||||
class RowInfo {
|
||||
public:
|
||||
// Constant data derived from Tesseract output.
|
||||
STRING text; // the full UTF-8 text of the line.
|
||||
bool ltr; // whether the majority of the text is left-to-right
|
||||
// TODO(eger) make this more fine-grained.
|
||||
|
||||
bool has_leaders; // does the line contain leader dots (.....)?
|
||||
bool has_drop_cap; // does the line have a drop cap?
|
||||
int pix_ldistance; // distance to the left pblock boundary in pixels
|
||||
int pix_rdistance; // distance to the right pblock boundary in pixels
|
||||
float pix_xheight; // guessed xheight for the line
|
||||
int average_interword_space; // average space between words in pixels.
|
||||
|
||||
int num_words;
|
||||
TBOX lword_box; // in normalized (horiz text rows) space
|
||||
TBOX rword_box; // in normalized (horiz text rows) space
|
||||
|
||||
STRING lword_text; // the UTF-8 text of the leftmost werd
|
||||
STRING rword_text; // the UTF-8 text of the rightmost werd
|
||||
|
||||
// The text of a paragraph typically starts with the start of an idea and
|
||||
// ends with the end of an idea. Here we define paragraph as something that
|
||||
// may have a first line indent and a body indent which may be different.
|
||||
// Typical words that start an idea are:
|
||||
// 1. Words in western scripts that start with
|
||||
// a capital letter, for example "The"
|
||||
// 2. Bulleted or numbered list items, for
|
||||
// example "2."
|
||||
// Typical words which end an idea are words ending in punctuation marks. In
|
||||
// this vocabulary, each list item is represented as a paragraph.
|
||||
bool lword_indicates_list_item;
|
||||
bool lword_likely_starts_idea;
|
||||
bool lword_likely_ends_idea;
|
||||
|
||||
bool rword_indicates_list_item;
|
||||
bool rword_likely_starts_idea;
|
||||
bool rword_likely_ends_idea;
|
||||
};
|
||||
|
||||
// Main entry point for Paragraph Detection Algorithm.
|
||||
//
|
||||
// Given a set of equally spaced textlines (described by row_infos),
|
||||
// Split them into paragraphs. See http://goto/paragraphstalk
|
||||
//
|
||||
// Output:
|
||||
// row_owners - one pointer for each row, to the paragraph it belongs to.
|
||||
// paragraphs - this is the actual list of PARA objects.
|
||||
// models - the list of paragraph models referenced by the PARA objects.
|
||||
// caller is responsible for deleting the models.
|
||||
void DetectParagraphs(int debug_level,
|
||||
GenericVector<RowInfo> *row_infos,
|
||||
GenericVector<PARA *> *row_owners,
|
||||
PARA_LIST *paragraphs,
|
||||
GenericVector<ParagraphModel *> *models);
|
||||
|
||||
// Given a MutableIterator to the start of a block, run DetectParagraphs on
|
||||
// that block and commit the results to the underlying ROW and BLOCK structs,
|
||||
// saving the ParagraphModels in models. Caller owns the models.
|
||||
// We use unicharset during the function to answer questions such as "is the
|
||||
// first letter of this word upper case?"
|
||||
void DetectParagraphs(int debug_level,
|
||||
bool after_text_recognition,
|
||||
const MutableIterator *block_start,
|
||||
GenericVector<ParagraphModel *> *models);
|
||||
|
||||
} // namespace
|
||||
|
||||
#endif // TESSERACT_CCMAIN_PARAGRAPHS_H_
|
|
@ -0,0 +1,312 @@
|
|||
/**********************************************************************
|
||||
* File: paragraphs.h
|
||||
* Description: Paragraph Detection internal data structures.
|
||||
* Author: David Eger
|
||||
* Created: 11 March 2011
|
||||
*
|
||||
* (C) Copyright 2011, Google Inc.
|
||||
** Licensed under the Apache License, Version 2.0 (the "License");
|
||||
** you may not use this file except in compliance with the License.
|
||||
** You may obtain a copy of the License at
|
||||
** http://www.apache.org/licenses/LICENSE-2.0
|
||||
** Unless required by applicable law or agreed to in writing, software
|
||||
** distributed under the License is distributed on an "AS IS" BASIS,
|
||||
** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
** See the License for the specific language governing permissions and
|
||||
** limitations under the License.
|
||||
*
|
||||
**********************************************************************/
|
||||
|
||||
#ifndef TESSERACT_CCMAIN_PARAGRAPHS_INTERNAL_H_
|
||||
#define TESSERACT_CCMAIN_PARAGRAPHS_INTERNAL_H_
|
||||
|
||||
#include "paragraphs.h"
|
||||
#ifdef _MSC_VER
|
||||
#include <string>
|
||||
#else
|
||||
#include "strings.h"
|
||||
#endif
|
||||
|
||||
// NO CODE OUTSIDE OF paragraphs.cpp AND TESTS SHOULD NEED TO ACCESS
|
||||
// DATA STRUCTURES OR FUNCTIONS IN THIS FILE.
|
||||
|
||||
class WERD_CHOICE;
|
||||
|
||||
namespace tesseract {
|
||||
|
||||
// Return whether the given word is likely to be a list item start word.
|
||||
bool AsciiLikelyListItem(const STRING &word);
|
||||
|
||||
// Return the first Unicode Codepoint from werd[pos].
|
||||
int UnicodeFor(const UNICHARSET *u, const WERD_CHOICE *werd, int pos);
|
||||
|
||||
// Set right word attributes given either a unicharset and werd or a utf8
|
||||
// string.
|
||||
void RightWordAttributes(const UNICHARSET *unicharset, const WERD_CHOICE *werd,
|
||||
const STRING &utf8,
|
||||
bool *is_list, bool *starts_idea, bool *ends_idea);
|
||||
|
||||
// Set left word attributes given either a unicharset and werd or a utf8 string.
|
||||
void LeftWordAttributes(const UNICHARSET *unicharset, const WERD_CHOICE *werd,
|
||||
const STRING &utf8,
|
||||
bool *is_list, bool *starts_idea, bool *ends_idea);
|
||||
|
||||
enum LineType {
|
||||
LT_START = 'S', // First line of a paragraph.
|
||||
LT_BODY = 'C', // Continuation line of a paragraph.
|
||||
LT_UNKNOWN = 'U', // No clues.
|
||||
LT_MULTIPLE = 'M', // Matches for both LT_START and LT_BODY.
|
||||
};
|
||||
|
||||
// The first paragraph in a page of body text is often un-indented.
|
||||
// This is a typographic convention which is common to indicate either that:
|
||||
// (1) The paragraph is the continuation of a previous paragraph, or
|
||||
// (2) The paragraph is the first paragraph in a chapter.
|
||||
//
|
||||
// I refer to such paragraphs as "crown"s, and the output of the paragraph
|
||||
// detection algorithm attempts to give them the same paragraph model as
|
||||
// the rest of the body text.
|
||||
//
|
||||
// Nonetheless, while building hypotheses, it is useful to mark the lines
|
||||
// of crown paragraphs temporarily as crowns, either aligned left or right.
|
||||
extern const ParagraphModel *kCrownLeft;
|
||||
extern const ParagraphModel *kCrownRight;
|
||||
|
||||
inline bool StrongModel(const ParagraphModel *model) {
|
||||
return model != NULL && model != kCrownLeft && model != kCrownRight;
|
||||
}
|
||||
|
||||
struct LineHypothesis {
|
||||
LineHypothesis() : ty(LT_UNKNOWN), model(NULL) {}
|
||||
LineHypothesis(LineType line_type, const ParagraphModel *m)
|
||||
: ty(line_type), model(m) {}
|
||||
LineHypothesis(const LineHypothesis &other)
|
||||
: ty(other.ty), model(other.model) {}
|
||||
|
||||
bool operator==(const LineHypothesis &other) const {
|
||||
return ty == other.ty && model == other.model;
|
||||
}
|
||||
|
||||
LineType ty;
|
||||
const ParagraphModel *model;
|
||||
};
|
||||
|
||||
class ParagraphTheory; // Forward Declaration
|
||||
|
||||
typedef GenericVectorEqEq<const ParagraphModel *> SetOfModels;
|
||||
|
||||
// Row Scratch Registers are data generated by the paragraph detection
|
||||
// algorithm based on a RowInfo input.
|
||||
class RowScratchRegisters {
|
||||
public:
|
||||
// We presume row will outlive us.
|
||||
void Init(const RowInfo &row);
|
||||
|
||||
LineType GetLineType() const;
|
||||
|
||||
LineType GetLineType(const ParagraphModel *model) const;
|
||||
|
||||
// Mark this as a start line type, sans model. This is useful for the
|
||||
// initial marking of probable body lines or paragraph start lines.
|
||||
void SetStartLine();
|
||||
|
||||
// Mark this as a body line type, sans model. This is useful for the
|
||||
// initial marking of probably body lines or paragraph start lines.
|
||||
void SetBodyLine();
|
||||
|
||||
// Record that this row fits as a paragraph start line in the given model,
|
||||
void AddStartLine(const ParagraphModel *model);
|
||||
// Record that this row fits as a paragraph body line in the given model,
|
||||
void AddBodyLine(const ParagraphModel *model);
|
||||
|
||||
// Clear all hypotheses about this line.
|
||||
void SetUnknown() { hypotheses_.truncate(0); }
|
||||
|
||||
// Append all hypotheses of strong models that match this row as a start.
|
||||
void StartHypotheses(SetOfModels *models) const;
|
||||
|
||||
// Append all hypotheses of strong models matching this row.
|
||||
void StrongHypotheses(SetOfModels *models) const;
|
||||
|
||||
// Append all hypotheses for this row.
|
||||
void NonNullHypotheses(SetOfModels *models) const;
|
||||
|
||||
// Discard any hypotheses whose model is not in the given list.
|
||||
void DiscardNonMatchingHypotheses(const SetOfModels &models);
|
||||
|
||||
// If we have only one hypothesis and that is that this line is a paragraph
|
||||
// start line of a certain model, return that model. Else return NULL.
|
||||
const ParagraphModel *UniqueStartHypothesis() const;
|
||||
|
||||
// If we have only one hypothesis and that is that this line is a paragraph
|
||||
// body line of a certain model, return that model. Else return NULL.
|
||||
const ParagraphModel *UniqueBodyHypothesis() const;
|
||||
|
||||
// Return the indentation for the side opposite of the aligned side.
|
||||
int OffsideIndent(tesseract::ParagraphJustification just) const {
|
||||
switch (just) {
|
||||
case tesseract::JUSTIFICATION_RIGHT: return lindent_;
|
||||
case tesseract::JUSTIFICATION_LEFT: return rindent_;
|
||||
default: return lindent_ > rindent_ ? lindent_ : rindent_;
|
||||
}
|
||||
}
|
||||
|
||||
// Return the indentation for the side the text is aligned to.
|
||||
int AlignsideIndent(tesseract::ParagraphJustification just) const {
|
||||
switch (just) {
|
||||
case tesseract::JUSTIFICATION_RIGHT: return rindent_;
|
||||
case tesseract::JUSTIFICATION_LEFT: return lindent_;
|
||||
default: return lindent_ > rindent_ ? lindent_ : rindent_;
|
||||
}
|
||||
}
|
||||
|
||||
// Append header fields to a vector of row headings.
|
||||
static void AppendDebugHeaderFields(GenericVector<STRING> *header);
|
||||
|
||||
// Append data for this row to a vector of debug strings.
|
||||
void AppendDebugInfo(const ParagraphTheory &theory,
|
||||
GenericVector<STRING> *dbg) const;
|
||||
|
||||
const RowInfo *ri_;
|
||||
|
||||
// These four constants form a horizontal box model for the white space
|
||||
// on the edges of each line. At each point in the algorithm, the following
|
||||
// shall hold:
|
||||
// ri_->pix_ldistance = lmargin_ + lindent_
|
||||
// ri_->pix_rdistance = rindent_ + rmargin_
|
||||
int lmargin_;
|
||||
int lindent_;
|
||||
int rindent_;
|
||||
int rmargin_;
|
||||
|
||||
private:
|
||||
// Hypotheses of either LT_START or LT_BODY
|
||||
GenericVectorEqEq<LineHypothesis> hypotheses_;
|
||||
};
|
||||
|
||||
// A collection of convenience functions for wrapping the set of
|
||||
// Paragraph Models we believe correctly model the paragraphs in the image.
|
||||
class ParagraphTheory {
|
||||
public:
|
||||
// We presume models will outlive us, and that models will take ownership
|
||||
// of any ParagraphModel *'s we add.
|
||||
explicit ParagraphTheory(GenericVector<ParagraphModel *> *models)
|
||||
: models_(models) {}
|
||||
GenericVector<ParagraphModel *> &models() { return *models_; }
|
||||
const GenericVector<ParagraphModel *> &models() const { return *models_; }
|
||||
|
||||
// Return an existing model if one that is Comparable() can be found.
|
||||
// Else, allocate a new copy of model to save and return a pointer to it.
|
||||
const ParagraphModel *AddModel(const ParagraphModel &model);
|
||||
|
||||
// Discard any models we've made that are not in the list of used models.
|
||||
void DiscardUnusedModels(const SetOfModels &used_models);
|
||||
|
||||
// Return the set of all non-centered models.
|
||||
void NonCenteredModels(SetOfModels *models);
|
||||
|
||||
// If any of the non-centered paragraph models we know about fit
|
||||
// rows[start, end), return it. Else NULL.
|
||||
const ParagraphModel *Fits(const GenericVector<RowScratchRegisters> *rows,
|
||||
int start, int end) const;
|
||||
|
||||
int IndexOf(const ParagraphModel *model) const;
|
||||
|
||||
private:
|
||||
GenericVector<ParagraphModel *> *models_;
|
||||
GenericVectorEqEq<ParagraphModel *> models_we_added_;
|
||||
};
|
||||
|
||||
bool ValidFirstLine(const GenericVector<RowScratchRegisters> *rows,
|
||||
int row, const ParagraphModel *model);
|
||||
bool ValidBodyLine(const GenericVector<RowScratchRegisters> *rows,
|
||||
int row, const ParagraphModel *model);
|
||||
bool CrownCompatible(const GenericVector<RowScratchRegisters> *rows,
|
||||
int a, int b, const ParagraphModel *model);
|
||||
|
||||
// A class for smearing Paragraph Model hypotheses to surrounding rows.
|
||||
// The idea here is that StrongEvidenceClassify first marks only exceedingly
|
||||
// obvious start and body rows and constructs models of them. Thereafter,
|
||||
// we may have left over unmarked lines (mostly end-of-paragraph lines) which
|
||||
// were too short to have much confidence about, but which fit the models we've
|
||||
// constructed perfectly and which we ought to mark. This class is used to
|
||||
// "smear" our models over the text.
|
||||
class ParagraphModelSmearer {
|
||||
public:
|
||||
ParagraphModelSmearer(GenericVector<RowScratchRegisters> *rows,
|
||||
int row_start, int row_end,
|
||||
ParagraphTheory *theory);
|
||||
|
||||
// Smear forward paragraph models from existing row markings to subsequent
|
||||
// text lines if they fit, and mark any thereafter still unmodeled rows
|
||||
// with any model in the theory that fits them.
|
||||
void Smear();
|
||||
|
||||
private:
|
||||
// Record in open_models_ for rows [start_row, end_row) the list of models
|
||||
// currently open at each row.
|
||||
// A model is still open in a row if some previous row has said model as a
|
||||
// start hypothesis, and all rows since (including this row) would fit as
|
||||
// either a body or start line in that model.
|
||||
void CalculateOpenModels(int row_start, int row_end);
|
||||
|
||||
SetOfModels &OpenModels(int row) {
|
||||
return open_models_[row - row_start_ + 1];
|
||||
}
|
||||
|
||||
ParagraphTheory *theory_;
|
||||
GenericVector<RowScratchRegisters> *rows_;
|
||||
int row_start_;
|
||||
int row_end_;
|
||||
|
||||
// open_models_ corresponds to rows[start_row_ - 1, end_row_]
|
||||
//
|
||||
// open_models_: Contains models which there was an active (open) paragraph
|
||||
// as of the previous line and for which the left and right
|
||||
// indents admit the possibility that this text line continues
|
||||
// to fit the same model.
|
||||
// TODO(eger): Think about whether we can get rid of "Open" models and just
|
||||
// use the current hypotheses on RowScratchRegisters.
|
||||
GenericVector<SetOfModels> open_models_;
|
||||
};
|
||||
|
||||
// Clear all hypotheses about lines [start, end) and reset the margins to the
|
||||
// percentile (0..100) value of the left and right row edges for this run of
|
||||
// rows.
|
||||
void RecomputeMarginsAndClearHypotheses(
|
||||
GenericVector<RowScratchRegisters> *rows, int start, int end,
|
||||
int percentile);
|
||||
|
||||
// Return the median inter-word space in rows[row_start, row_end).
|
||||
int InterwordSpace(const GenericVector<RowScratchRegisters> &rows,
|
||||
int row_start, int row_end);
|
||||
|
||||
// Return whether the first word on the after line can fit in the space at
|
||||
// the end of the before line (knowing which way the text is aligned and read).
|
||||
bool FirstWordWouldHaveFit(const RowScratchRegisters &before,
|
||||
const RowScratchRegisters &after,
|
||||
tesseract::ParagraphJustification justification);
|
||||
|
||||
// Return whether the first word on the after line can fit in the space at
|
||||
// the end of the before line (not knowing the text alignment).
|
||||
bool FirstWordWouldHaveFit(const RowScratchRegisters &before,
|
||||
const RowScratchRegisters &after);
|
||||
|
||||
// Do rows[start, end) form a single instance of the given paragraph model?
|
||||
bool RowsFitModel(const GenericVector<RowScratchRegisters> *rows,
|
||||
int start, int end, const ParagraphModel *model);
|
||||
|
||||
// Do the text and geometry of two rows support a paragraph break between them?
|
||||
bool LikelyParagraphStart(const RowScratchRegisters &before,
|
||||
const RowScratchRegisters &after,
|
||||
tesseract::ParagraphJustification j);
|
||||
|
||||
// Given a set of row_owners pointing to PARAs or NULL (no paragraph known),
|
||||
// normalize each row_owner to point to an actual PARA, and output the
|
||||
// paragraphs in order onto paragraphs.
|
||||
void CanonicalizeDetectionResults(
|
||||
GenericVector<PARA *> *row_owners,
|
||||
PARA_LIST *paragraphs);
|
||||
|
||||
} // namespace
|
||||
#endif // TESSERACT_CCMAIN_PARAGRAPHS_INTERNAL_H_
|
|
@ -0,0 +1,370 @@
|
|||
///////////////////////////////////////////////////////////////////////
|
||||
// File: paramsd.cpp
|
||||
// Description: Tesseract parameter Editor
|
||||
// Author: Joern Wanke
|
||||
// Created: Wed Jul 18 10:05:01 PDT 2007
|
||||
//
|
||||
// (C) Copyright 2007, Google Inc.
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
///////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// The parameters editor is used to edit all the parameters used within
|
||||
// tesseract from the ui.
|
||||
#ifdef _WIN32
|
||||
#else
|
||||
#include <stdlib.h>
|
||||
#include <stdio.h>
|
||||
#endif
|
||||
|
||||
#include <map>
|
||||
|
||||
// Include automatically generated configuration file if running autoconf.
|
||||
#ifdef HAVE_CONFIG_H
|
||||
#include "config_auto.h"
|
||||
#endif
|
||||
|
||||
#ifndef GRAPHICS_DISABLED
|
||||
#include "paramsd.h"
|
||||
|
||||
|
||||
#include "params.h"
|
||||
#include "scrollview.h"
|
||||
#include "svmnode.h"
|
||||
|
||||
|
||||
#define VARDIR "configs/" /*parameters files */
|
||||
#define MAX_ITEMS_IN_SUBMENU 30
|
||||
|
||||
// The following variables should remain static globals, since they
|
||||
// are used by debug editor, which uses a single Tesseract instance.
|
||||
//
|
||||
// Contains the mappings from unique VC ids to their actual pointers.
|
||||
static std::map<int, ParamContent*> vcMap;
|
||||
static int nrParams = 0;
|
||||
static int writeCommands[2];
|
||||
|
||||
ELISTIZE(ParamContent)
|
||||
|
||||
// Constructors for the various ParamTypes.
|
||||
ParamContent::ParamContent(tesseract::StringParam* it) {
|
||||
my_id_ = nrParams;
|
||||
nrParams++;
|
||||
param_type_ = VT_STRING;
|
||||
sIt = it;
|
||||
vcMap[my_id_] = this;
|
||||
}
|
||||
// Constructors for the various ParamTypes.
|
||||
ParamContent::ParamContent(tesseract::IntParam* it) {
|
||||
my_id_ = nrParams;
|
||||
nrParams++;
|
||||
param_type_ = VT_INTEGER;
|
||||
iIt = it;
|
||||
vcMap[my_id_] = this;
|
||||
}
|
||||
// Constructors for the various ParamTypes.
|
||||
ParamContent::ParamContent(tesseract::BoolParam* it) {
|
||||
my_id_ = nrParams;
|
||||
nrParams++;
|
||||
param_type_ = VT_BOOLEAN;
|
||||
bIt = it;
|
||||
vcMap[my_id_] = this;
|
||||
}
|
||||
// Constructors for the various ParamTypes.
|
||||
ParamContent::ParamContent(tesseract::DoubleParam* it) {
|
||||
my_id_ = nrParams;
|
||||
nrParams++;
|
||||
param_type_ = VT_DOUBLE;
|
||||
dIt = it;
|
||||
vcMap[my_id_] = this;
|
||||
}
|
||||
|
||||
// Gets a VC object identified by its ID.
|
||||
ParamContent* ParamContent::GetParamContentById(int id) {
|
||||
return vcMap[id];
|
||||
}
|
||||
|
||||
// Copy the first N words from the source string to the target string.
|
||||
// Words are delimited by "_".
|
||||
void ParamsEditor::GetFirstWords(
|
||||
const char *s, // source string
|
||||
int n, // number of words
|
||||
char *t // target string
|
||||
) {
|
||||
int full_length = strlen(s);
|
||||
int reqd_len = 0; // No. of chars requird
|
||||
const char *next_word = s;
|
||||
|
||||
while ((n > 0) && reqd_len < full_length) {
|
||||
reqd_len += strcspn(next_word, "_") + 1;
|
||||
next_word += reqd_len;
|
||||
n--;
|
||||
}
|
||||
strncpy(t, s, reqd_len);
|
||||
t[reqd_len] = '\0'; // ensure null terminal
|
||||
}
|
||||
|
||||
// Getter for the name.
|
||||
const char* ParamContent::GetName() const {
|
||||
if (param_type_ == VT_INTEGER) { return iIt->name_str(); }
|
||||
else if (param_type_ == VT_BOOLEAN) { return bIt->name_str(); }
|
||||
else if (param_type_ == VT_DOUBLE) { return dIt->name_str(); }
|
||||
else if (param_type_ == VT_STRING) { return sIt->name_str(); }
|
||||
else
|
||||
return "ERROR: ParamContent::GetName()";
|
||||
}
|
||||
|
||||
// Getter for the description.
|
||||
const char* ParamContent::GetDescription() const {
|
||||
if (param_type_ == VT_INTEGER) { return iIt->info_str(); }
|
||||
else if (param_type_ == VT_BOOLEAN) { return bIt->info_str(); }
|
||||
else if (param_type_ == VT_DOUBLE) { return dIt->info_str(); }
|
||||
else if (param_type_ == VT_STRING) { return sIt->info_str(); }
|
||||
else return NULL;
|
||||
}
|
||||
|
||||
// Getter for the value.
|
||||
STRING ParamContent::GetValue() const {
|
||||
STRING result;
|
||||
if (param_type_ == VT_INTEGER) {
|
||||
result.add_str_int("", *iIt);
|
||||
}
|
||||
else if (param_type_ == VT_BOOLEAN) {
|
||||
result.add_str_int("", *bIt);
|
||||
}
|
||||
else if (param_type_ == VT_DOUBLE) {
|
||||
result.add_str_double("", *dIt);
|
||||
}
|
||||
else if (param_type_ == VT_STRING) {
|
||||
if (((STRING) * (sIt)).string() != NULL) {
|
||||
result = sIt->string();
|
||||
}
|
||||
else {
|
||||
result = "Null";
|
||||
}
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
// Setter for the value.
|
||||
void ParamContent::SetValue(const char* val) {
|
||||
// TODO (wanke) Test if the values actually are properly converted.
|
||||
// (Quickly visible impacts?)
|
||||
changed_ = TRUE;
|
||||
if (param_type_ == VT_INTEGER) {
|
||||
iIt->set_value(atoi(val));
|
||||
}
|
||||
else if (param_type_ == VT_BOOLEAN) {
|
||||
bIt->set_value(atoi(val));
|
||||
}
|
||||
else if (param_type_ == VT_DOUBLE) {
|
||||
dIt->set_value(strtod(val, NULL));
|
||||
}
|
||||
else if (param_type_ == VT_STRING) {
|
||||
sIt->set_value(val);
|
||||
}
|
||||
}
|
||||
|
||||
// Gets the up to the first 3 prefixes from s (split by _).
|
||||
// For example, tesseract_foo_bar will be split into tesseract,foo and bar.
|
||||
void ParamsEditor::GetPrefixes(const char* s, STRING* level_one,
|
||||
STRING* level_two,
|
||||
STRING* level_three) {
|
||||
char* p = new char[1024];
|
||||
GetFirstWords(s, 1, p);
|
||||
*level_one = p;
|
||||
GetFirstWords(s, 2, p);
|
||||
*level_two = p;
|
||||
GetFirstWords(s, 3, p);
|
||||
*level_three = p;
|
||||
delete[] p;
|
||||
}
|
||||
|
||||
// Compare two VC objects by their name.
|
||||
int ParamContent::Compare(const void* v1, const void* v2) {
|
||||
const ParamContent* one =
|
||||
*reinterpret_cast<const ParamContent* const *>(v1);
|
||||
const ParamContent* two =
|
||||
*reinterpret_cast<const ParamContent* const *>(v2);
|
||||
return strcmp(one->GetName(), two->GetName());
|
||||
}
|
||||
|
||||
// Find all editable parameters used within tesseract and create a
|
||||
// SVMenuNode tree from it.
|
||||
// TODO (wanke): This is actually sort of hackish.
|
||||
SVMenuNode* ParamsEditor::BuildListOfAllLeaves(tesseract::Tesseract *tess) {
|
||||
SVMenuNode* mr = new SVMenuNode();
|
||||
ParamContent_LIST vclist;
|
||||
ParamContent_IT vc_it(&vclist);
|
||||
// Amount counts the number of entries for a specific char*.
|
||||
// TODO(rays) get rid of the use of std::map.
|
||||
std::map<const char*, int> amount;
|
||||
|
||||
// Add all parameters to a list.
|
||||
int v, i;
|
||||
int num_iterations = (tess->params() == NULL) ? 1 : 2;
|
||||
for (v = 0; v < num_iterations; ++v) {
|
||||
tesseract::ParamsVectors *vec = (v == 0) ? GlobalParams() : tess->params();
|
||||
for (i = 0; i < vec->int_params.size(); ++i) {
|
||||
vc_it.add_after_then_move(new ParamContent(vec->int_params[i]));
|
||||
}
|
||||
for (i = 0; i < vec->bool_params.size(); ++i) {
|
||||
vc_it.add_after_then_move(new ParamContent(vec->bool_params[i]));
|
||||
}
|
||||
for (i = 0; i < vec->string_params.size(); ++i) {
|
||||
vc_it.add_after_then_move(new ParamContent(vec->string_params[i]));
|
||||
}
|
||||
for (i = 0; i < vec->double_params.size(); ++i) {
|
||||
vc_it.add_after_then_move(new ParamContent(vec->double_params[i]));
|
||||
}
|
||||
}
|
||||
|
||||
// Count the # of entries starting with a specific prefix.
|
||||
for (vc_it.mark_cycle_pt(); !vc_it.cycled_list(); vc_it.forward()) {
|
||||
ParamContent* vc = vc_it.data();
|
||||
STRING tag;
|
||||
STRING tag2;
|
||||
STRING tag3;
|
||||
|
||||
GetPrefixes(vc->GetName(), &tag, &tag2, &tag3);
|
||||
amount[tag.string()]++;
|
||||
amount[tag2.string()]++;
|
||||
amount[tag3.string()]++;
|
||||
}
|
||||
|
||||
vclist.sort(ParamContent::Compare); // Sort the list alphabetically.
|
||||
|
||||
SVMenuNode* other = mr->AddChild("OTHER");
|
||||
|
||||
// go through the list again and this time create the menu structure.
|
||||
vc_it.move_to_first();
|
||||
for (vc_it.mark_cycle_pt(); !vc_it.cycled_list(); vc_it.forward()) {
|
||||
ParamContent* vc = vc_it.data();
|
||||
STRING tag;
|
||||
STRING tag2;
|
||||
STRING tag3;
|
||||
GetPrefixes(vc->GetName(), &tag, &tag2, &tag3);
|
||||
|
||||
if (amount[tag.string()] == 1) {
|
||||
other->AddChild(vc->GetName(), vc->GetId(), vc->GetValue().string(),
|
||||
vc->GetDescription());
|
||||
}
|
||||
else { // More than one would use this submenu -> create submenu.
|
||||
SVMenuNode* sv = mr->AddChild(tag.string());
|
||||
if ((amount[tag.string()] <= MAX_ITEMS_IN_SUBMENU) ||
|
||||
(amount[tag2.string()] <= 1)) {
|
||||
sv->AddChild(vc->GetName(), vc->GetId(),
|
||||
vc->GetValue().string(), vc->GetDescription());
|
||||
}
|
||||
else { // Make subsubmenus.
|
||||
SVMenuNode* sv2 = sv->AddChild(tag2.string());
|
||||
sv2->AddChild(vc->GetName(), vc->GetId(),
|
||||
vc->GetValue().string(), vc->GetDescription());
|
||||
}
|
||||
}
|
||||
}
|
||||
return mr;
|
||||
}
|
||||
|
||||
// Event listener. Waits for SVET_POPUP events and processes them.
|
||||
void ParamsEditor::Notify(const SVEvent* sve) {
|
||||
if (sve->type == SVET_POPUP) { // only catch SVET_POPUP!
|
||||
char* param = sve->parameter;
|
||||
if (sve->command_id == writeCommands[0]) {
|
||||
WriteParams(param, false);
|
||||
}
|
||||
else if (sve->command_id == writeCommands[1]) {
|
||||
WriteParams(param, true);
|
||||
}
|
||||
else {
|
||||
ParamContent* vc = ParamContent::GetParamContentById(
|
||||
sve->command_id);
|
||||
vc->SetValue(param);
|
||||
sv_window_->AddMessage("Setting %s to %s",
|
||||
vc->GetName(), vc->GetValue().string());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Integrate the parameters editor as popupmenu into the existing scrollview
|
||||
// window (usually the pg editor). If sv == null, create a new empty
|
||||
// empty window and attach the parameters editor to that window (ugly).
|
||||
ParamsEditor::ParamsEditor(tesseract::Tesseract* tess,
|
||||
ScrollView* sv) {
|
||||
if (sv == NULL) {
|
||||
const char* name = "ParamEditorMAIN";
|
||||
sv = new ScrollView(name, 1, 1, 200, 200, 300, 200);
|
||||
}
|
||||
|
||||
sv_window_ = sv;
|
||||
|
||||
//Only one event handler per window.
|
||||
//sv->AddEventHandler((SVEventHandler*) this);
|
||||
|
||||
SVMenuNode* svMenuRoot = BuildListOfAllLeaves(tess);
|
||||
|
||||
STRING paramfile;
|
||||
paramfile = tess->datadir;
|
||||
paramfile += VARDIR; // parameters dir
|
||||
paramfile += "edited"; // actual name
|
||||
|
||||
SVMenuNode* std_menu = svMenuRoot->AddChild("Build Config File");
|
||||
|
||||
writeCommands[0] = nrParams + 1;
|
||||
std_menu->AddChild("All Parameters", writeCommands[0],
|
||||
paramfile.string(), "Config file name?");
|
||||
|
||||
writeCommands[1] = nrParams + 2;
|
||||
std_menu->AddChild("changed_ Parameters Only", writeCommands[1],
|
||||
paramfile.string(), "Config file name?");
|
||||
|
||||
svMenuRoot->BuildMenu(sv, false);
|
||||
}
|
||||
|
||||
|
||||
// Write all (changed_) parameters to a config file.
|
||||
void ParamsEditor::WriteParams(char *filename,
|
||||
bool changes_only) {
|
||||
FILE *fp; // input file
|
||||
char msg_str[255];
|
||||
// if file exists
|
||||
if ((fp = fopen(filename, "rb")) != NULL) {
|
||||
fclose(fp);
|
||||
sprintf(msg_str, "Overwrite file " "%s" "? (Y/N)", filename);
|
||||
int a = sv_window_->ShowYesNoDialog(msg_str);
|
||||
if (a == 'n') {
|
||||
return;
|
||||
} // don't write
|
||||
}
|
||||
|
||||
|
||||
fp = fopen(filename, "wb"); // can we write to it?
|
||||
if (fp == NULL) {
|
||||
sv_window_->AddMessage(
|
||||
"Can't write to file "
|
||||
"%s"
|
||||
"",
|
||||
filename);
|
||||
return;
|
||||
}
|
||||
|
||||
for (std::map<int, ParamContent*>::iterator iter = vcMap.begin();
|
||||
iter != vcMap.end();
|
||||
++iter) {
|
||||
ParamContent* cur = iter->second;
|
||||
if (!changes_only || cur->HasChanged()) {
|
||||
fprintf(fp, "%-25s %-12s # %s\n",
|
||||
cur->GetName(), cur->GetValue().string(), cur->GetDescription());
|
||||
}
|
||||
}
|
||||
fclose(fp);
|
||||
}
|
||||
#endif
|
|
@ -0,0 +1,126 @@
|
|||
///////////////////////////////////////////////////////////////////////
|
||||
// File: paramsd.cpp
|
||||
// Description: Tesseract parameter editor
|
||||
// Author: Joern Wanke
|
||||
// Created: Wed Jul 18 10:05:01 PDT 2007
|
||||
//
|
||||
// (C) Copyright 2007, Google Inc.
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
///////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// Tesseract parameter editor is used to edit all the parameters used
|
||||
// within tesseract from the ui.
|
||||
#ifndef GRAPHICS_DISABLED
|
||||
#ifndef VARABLED_H
|
||||
#define VARABLED_H
|
||||
|
||||
#include "elst.h"
|
||||
#ifndef NO_CUBE_BUILD
|
||||
#include "scrollview.h"
|
||||
#endif
|
||||
#include "params.h"
|
||||
#include "tesseractclass.h"
|
||||
|
||||
class SVMenuNode;
|
||||
|
||||
// A list of all possible parameter types used.
|
||||
enum ParamType {
|
||||
VT_INTEGER,
|
||||
VT_BOOLEAN,
|
||||
VT_STRING,
|
||||
VT_DOUBLE
|
||||
};
|
||||
|
||||
// A rather hackish helper structure which can take any kind of parameter input
|
||||
// (defined by ParamType) and do a couple of common operations on them, like
|
||||
// comparisond or getting its value. It is used in the context of the
|
||||
// ParamsEditor as a bridge from the internal tesseract parameters to the
|
||||
// ones displayed by the ScrollView server.
|
||||
class ParamContent : public ELIST_LINK {
|
||||
public:
|
||||
// Compare two VC objects by their name.
|
||||
static int Compare(const void* v1, const void* v2);
|
||||
|
||||
// Gets a VC object identified by its ID.
|
||||
static ParamContent* GetParamContentById(int id);
|
||||
|
||||
// Constructors for the various ParamTypes.
|
||||
ParamContent() {
|
||||
}
|
||||
explicit ParamContent(tesseract::StringParam* it);
|
||||
explicit ParamContent(tesseract::IntParam* it);
|
||||
explicit ParamContent(tesseract::BoolParam* it);
|
||||
explicit ParamContent(tesseract::DoubleParam* it);
|
||||
|
||||
|
||||
// Getters and Setters.
|
||||
void SetValue(const char* val);
|
||||
STRING GetValue() const;
|
||||
const char* GetName() const;
|
||||
const char* GetDescription() const;
|
||||
|
||||
int GetId() { return my_id_; }
|
||||
bool HasChanged() { return changed_; }
|
||||
|
||||
private:
|
||||
// The unique ID of this VC object.
|
||||
int my_id_;
|
||||
// Whether the parameter was changed_ and thus needs to be rewritten.
|
||||
bool changed_;
|
||||
// The actual ParamType of this VC object.
|
||||
ParamType param_type_;
|
||||
|
||||
tesseract::StringParam* sIt;
|
||||
tesseract::IntParam* iIt;
|
||||
tesseract::BoolParam* bIt;
|
||||
tesseract::DoubleParam* dIt;
|
||||
};
|
||||
|
||||
ELISTIZEH(ParamContent)
|
||||
|
||||
// The parameters editor enables the user to edit all the parameters used within
|
||||
// tesseract. It can be invoked on its own, but is supposed to be invoked by
|
||||
// the program editor.
|
||||
class ParamsEditor : public SVEventHandler {
|
||||
public:
|
||||
// Integrate the parameters editor as popupmenu into the existing scrollview
|
||||
// window (usually the pg editor). If sv == null, create a new empty
|
||||
// empty window and attach the parameter editor to that window (ugly).
|
||||
explicit ParamsEditor(tesseract::Tesseract*, ScrollView* sv = NULL);
|
||||
|
||||
// Event listener. Waits for SVET_POPUP events and processes them.
|
||||
void Notify(const SVEvent* sve);
|
||||
|
||||
private:
|
||||
// Gets the up to the first 3 prefixes from s (split by _).
|
||||
// For example, tesseract_foo_bar will be split into tesseract,foo and bar.
|
||||
void GetPrefixes(const char* s, STRING* level_one,
|
||||
STRING* level_two, STRING* level_three);
|
||||
|
||||
// Gets the first n words (split by _) and puts them in t.
|
||||
// For example, tesseract_foo_bar with N=2 will yield tesseract_foo_.
|
||||
void GetFirstWords(const char *s, // source string
|
||||
int n, // number of words
|
||||
char *t); // target string
|
||||
|
||||
// Find all editable parameters used within tesseract and create a
|
||||
// SVMenuNode tree from it.
|
||||
SVMenuNode *BuildListOfAllLeaves(tesseract::Tesseract *tess);
|
||||
|
||||
// Write all (changed_) parameters to a config file.
|
||||
void WriteParams(char* filename, bool changes_only);
|
||||
|
||||
ScrollView* sv_window_;
|
||||
};
|
||||
|
||||
#endif
|
||||
#endif
|
File diff suppressed because it is too large
Load Diff
|
@ -0,0 +1,87 @@
|
|||
///////////////////////////////////////////////////////////////////////
|
||||
// File: pgedit.h
|
||||
// Description: Page structure file editor
|
||||
// Author: Joern Wanke
|
||||
// Created: Wed Jul 18 10:05:01 PDT 2007
|
||||
//
|
||||
// (C) Copyright 2007, Google Inc.
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
///////////////////////////////////////////////////////////////////////
|
||||
|
||||
#ifndef PGEDIT_H
|
||||
#define PGEDIT_H
|
||||
|
||||
#include "ocrblock.h"
|
||||
#include "ocrrow.h"
|
||||
#include "werd.h"
|
||||
#include "rect.h"
|
||||
#include "params.h"
|
||||
#include "tesseractclass.h"
|
||||
|
||||
class ScrollView;
|
||||
class SVMenuNode;
|
||||
struct SVEvent;
|
||||
|
||||
// A small event handler class to process incoming events to
|
||||
// this window.
|
||||
class PGEventHandler : public SVEventHandler {
|
||||
public:
|
||||
PGEventHandler(tesseract::Tesseract* tess) : tess_(tess) {
|
||||
}
|
||||
void Notify(const SVEvent* sve);
|
||||
private:
|
||||
tesseract::Tesseract* tess_;
|
||||
};
|
||||
|
||||
extern BLOCK_LIST *current_block_list;
|
||||
extern STRING_VAR_H(editor_image_win_name, "EditorImage",
|
||||
"Editor image window name");
|
||||
extern INT_VAR_H(editor_image_xpos, 590, "Editor image X Pos");
|
||||
extern INT_VAR_H(editor_image_ypos, 10, "Editor image Y Pos");
|
||||
extern INT_VAR_H(editor_image_height, 680, "Editor image height");
|
||||
extern INT_VAR_H(editor_image_width, 655, "Editor image width");
|
||||
extern INT_VAR_H(editor_image_word_bb_color, BLUE,
|
||||
"Word bounding box colour");
|
||||
extern INT_VAR_H(editor_image_blob_bb_color, YELLOW,
|
||||
"Blob bounding box colour");
|
||||
extern INT_VAR_H(editor_image_text_color, WHITE, "Correct text colour");
|
||||
extern STRING_VAR_H(editor_dbwin_name, "EditorDBWin",
|
||||
"Editor debug window name");
|
||||
extern INT_VAR_H(editor_dbwin_xpos, 50, "Editor debug window X Pos");
|
||||
extern INT_VAR_H(editor_dbwin_ypos, 500, "Editor debug window Y Pos");
|
||||
extern INT_VAR_H(editor_dbwin_height, 24, "Editor debug window height");
|
||||
extern INT_VAR_H(editor_dbwin_width, 80, "Editor debug window width");
|
||||
extern STRING_VAR_H(editor_word_name, "BlnWords",
|
||||
"BL normalised word window");
|
||||
extern INT_VAR_H(editor_word_xpos, 60, "Word window X Pos");
|
||||
extern INT_VAR_H(editor_word_ypos, 510, "Word window Y Pos");
|
||||
extern INT_VAR_H(editor_word_height, 240, "Word window height");
|
||||
extern INT_VAR_H(editor_word_width, 655, "Word window width");
|
||||
extern double_VAR_H(editor_smd_scale_factor, 1.0, "Scaling for smd image");
|
||||
|
||||
ScrollView* bln_word_window_handle(); //return handle
|
||||
void build_image_window(int width, int height);
|
||||
void display_bln_lines(ScrollView window,
|
||||
ScrollView::Color colour,
|
||||
float scale_factor,
|
||||
float y_offset,
|
||||
float minx,
|
||||
float maxx);
|
||||
//function to call
|
||||
void pgeditor_msg( //message display
|
||||
const char *msg);
|
||||
void pgeditor_show_point( //display coords
|
||||
SVEvent *event);
|
||||
//put bln word in box
|
||||
void show_point(PAGE_RES* page_res, float x, float y);
|
||||
|
||||
#endif
|
|
@ -0,0 +1,233 @@
|
|||
///////////////////////////////////////////////////////////////////////
|
||||
// File: recogtraining.cpp
|
||||
// Description: Functions for ambiguity and parameter training.
|
||||
// Author: Daria Antonova
|
||||
// Created: Mon Aug 13 11:26:43 PDT 2009
|
||||
//
|
||||
// (C) Copyright 2009, Google Inc.
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
///////////////////////////////////////////////////////////////////////
|
||||
|
||||
#include "tesseractclass.h"
|
||||
|
||||
#include "boxread.h"
|
||||
#include "control.h"
|
||||
#include "cutil.h"
|
||||
#include "host.h"
|
||||
#include "ratngs.h"
|
||||
#include "reject.h"
|
||||
#include "stopper.h"
|
||||
|
||||
namespace tesseract {
|
||||
|
||||
const inT16 kMaxBoxEdgeDiff = 2;
|
||||
|
||||
// Sets flags necessary for recognition in the training mode.
|
||||
// Opens and returns the pointer to the output file.
|
||||
FILE *Tesseract::init_recog_training(const STRING &fname) {
|
||||
if (tessedit_ambigs_training) {
|
||||
tessedit_tess_adaption_mode.set_value(0); // turn off adaption
|
||||
tessedit_enable_doc_dict.set_value(0); // turn off document dictionary
|
||||
// Explore all segmentations.
|
||||
getDict().stopper_no_acceptable_choices.set_value(1);
|
||||
}
|
||||
|
||||
STRING output_fname = fname;
|
||||
const char *lastdot = strrchr(output_fname.string(), '.');
|
||||
if (lastdot != NULL) output_fname[lastdot - output_fname.string()] = '\0';
|
||||
output_fname += ".txt";
|
||||
FILE *output_file = open_file(output_fname.string(), "a+");
|
||||
return output_file;
|
||||
}
|
||||
|
||||
// Copies the bounding box from page_res_it->word() to the given TBOX.
|
||||
bool read_t(PAGE_RES_IT *page_res_it, TBOX *tbox) {
|
||||
while (page_res_it->block() != NULL && page_res_it->word() == NULL)
|
||||
page_res_it->forward();
|
||||
|
||||
if (page_res_it->word() != NULL) {
|
||||
*tbox = page_res_it->word()->word->bounding_box();
|
||||
|
||||
// If tbox->left() is negative, the training image has vertical text and
|
||||
// all the coordinates of bounding boxes of page_res are rotated by 90
|
||||
// degrees in a counterclockwise direction. We need to rotate the TBOX back
|
||||
// in order to compare with the TBOXes of box files.
|
||||
if (tbox->left() < 0) {
|
||||
tbox->rotate(FCOORD(0.0, -1.0));
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
else {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
// This function takes tif/box pair of files and runs recognition on the image,
|
||||
// while making sure that the word bounds that tesseract identified roughly
|
||||
// match to those specified by the input box file. For each word (ngram in a
|
||||
// single bounding box from the input box file) it outputs the ocred result,
|
||||
// the correct label, rating and certainty.
|
||||
void Tesseract::recog_training_segmented(const STRING &fname,
|
||||
PAGE_RES *page_res,
|
||||
volatile ETEXT_DESC *monitor,
|
||||
FILE *output_file) {
|
||||
STRING box_fname = fname;
|
||||
const char *lastdot = strrchr(box_fname.string(), '.');
|
||||
if (lastdot != NULL) box_fname[lastdot - box_fname.string()] = '\0';
|
||||
box_fname += ".box";
|
||||
// ReadNextBox() will close box_file
|
||||
FILE *box_file = open_file(box_fname.string(), "r");
|
||||
|
||||
PAGE_RES_IT page_res_it;
|
||||
page_res_it.page_res = page_res;
|
||||
page_res_it.restart_page();
|
||||
STRING label;
|
||||
|
||||
// Process all the words on this page.
|
||||
TBOX tbox; // tesseract-identified box
|
||||
TBOX bbox; // box from the box file
|
||||
bool keep_going;
|
||||
int line_number = 0;
|
||||
int examined_words = 0;
|
||||
do {
|
||||
keep_going = read_t(&page_res_it, &tbox);
|
||||
keep_going &= ReadNextBox(applybox_page, &line_number, box_file, &label,
|
||||
&bbox);
|
||||
// Align bottom left points of the TBOXes.
|
||||
while (keep_going &&
|
||||
!NearlyEqual<int>(tbox.bottom(), bbox.bottom(), kMaxBoxEdgeDiff)) {
|
||||
if (bbox.bottom() < tbox.bottom()) {
|
||||
page_res_it.forward();
|
||||
keep_going = read_t(&page_res_it, &tbox);
|
||||
}
|
||||
else {
|
||||
keep_going = ReadNextBox(applybox_page, &line_number, box_file, &label,
|
||||
&bbox);
|
||||
}
|
||||
}
|
||||
while (keep_going &&
|
||||
!NearlyEqual<int>(tbox.left(), bbox.left(), kMaxBoxEdgeDiff)) {
|
||||
if (bbox.left() > tbox.left()) {
|
||||
page_res_it.forward();
|
||||
keep_going = read_t(&page_res_it, &tbox);
|
||||
}
|
||||
else {
|
||||
keep_going = ReadNextBox(applybox_page, &line_number, box_file, &label,
|
||||
&bbox);
|
||||
}
|
||||
}
|
||||
// OCR the word if top right points of the TBOXes are similar.
|
||||
if (keep_going &&
|
||||
NearlyEqual<int>(tbox.right(), bbox.right(), kMaxBoxEdgeDiff) &&
|
||||
NearlyEqual<int>(tbox.top(), bbox.top(), kMaxBoxEdgeDiff)) {
|
||||
ambigs_classify_and_output(label.string(), &page_res_it, output_file);
|
||||
examined_words++;
|
||||
}
|
||||
page_res_it.forward();
|
||||
} while (keep_going);
|
||||
|
||||
// Set up scripts on all of the words that did not get sent to
|
||||
// ambigs_classify_and_output. They all should have, but if all the
|
||||
// werd_res's don't get uch_sets, tesseract will crash when you try
|
||||
// to iterate over them. :-(
|
||||
int total_words = 0;
|
||||
for (page_res_it.restart_page(); page_res_it.block() != NULL;
|
||||
page_res_it.forward()) {
|
||||
if (page_res_it.word()) {
|
||||
if (page_res_it.word()->uch_set == NULL)
|
||||
page_res_it.word()->SetupFake(unicharset);
|
||||
total_words++;
|
||||
}
|
||||
}
|
||||
if (examined_words < 0.85 * total_words) {
|
||||
tprintf("TODO(antonova): clean up recog_training_segmented; "
|
||||
" It examined only a small fraction of the ambigs image.\n");
|
||||
}
|
||||
tprintf("recog_training_segmented: examined %d / %d words.\n",
|
||||
examined_words, total_words);
|
||||
}
|
||||
|
||||
// Helper prints the given set of blob choices.
|
||||
static void PrintPath(int length, const BLOB_CHOICE** blob_choices,
|
||||
const UNICHARSET& unicharset,
|
||||
const char *label, FILE *output_file) {
|
||||
float rating = 0.0f;
|
||||
float certainty = 0.0f;
|
||||
for (int i = 0; i < length; ++i) {
|
||||
const BLOB_CHOICE* blob_choice = blob_choices[i];
|
||||
fprintf(output_file, "%s",
|
||||
unicharset.id_to_unichar(blob_choice->unichar_id()));
|
||||
rating += blob_choice->rating();
|
||||
if (certainty > blob_choice->certainty())
|
||||
certainty = blob_choice->certainty();
|
||||
}
|
||||
fprintf(output_file, "\t%s\t%.4f\t%.4f\n",
|
||||
label, rating, certainty);
|
||||
}
|
||||
|
||||
// Helper recursively prints all paths through the ratings matrix, starting
|
||||
// at column col.
|
||||
static void PrintMatrixPaths(int col, int dim,
|
||||
const MATRIX& ratings,
|
||||
int length, const BLOB_CHOICE** blob_choices,
|
||||
const UNICHARSET& unicharset,
|
||||
const char *label, FILE *output_file) {
|
||||
for (int row = col; row < dim && row - col < ratings.bandwidth(); ++row) {
|
||||
if (ratings.get(col, row) != NOT_CLASSIFIED) {
|
||||
BLOB_CHOICE_IT bc_it(ratings.get(col, row));
|
||||
for (bc_it.mark_cycle_pt(); !bc_it.cycled_list(); bc_it.forward()) {
|
||||
blob_choices[length] = bc_it.data();
|
||||
if (row + 1 < dim) {
|
||||
PrintMatrixPaths(row + 1, dim, ratings, length + 1, blob_choices,
|
||||
unicharset, label, output_file);
|
||||
}
|
||||
else {
|
||||
PrintPath(length + 1, blob_choices, unicharset, label, output_file);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Runs classify_word_pass1() on the current word. Outputs Tesseract's
|
||||
// raw choice as a result of the classification. For words labeled with a
|
||||
// single unichar also outputs all alternatives from blob_choices of the
|
||||
// best choice.
|
||||
void Tesseract::ambigs_classify_and_output(const char *label,
|
||||
PAGE_RES_IT* pr_it,
|
||||
FILE *output_file) {
|
||||
// Classify word.
|
||||
fflush(stdout);
|
||||
WordData word_data(*pr_it);
|
||||
SetupWordPassN(1, &word_data);
|
||||
classify_word_and_language(1, pr_it, &word_data);
|
||||
WERD_RES* werd_res = word_data.word;
|
||||
WERD_CHOICE *best_choice = werd_res->best_choice;
|
||||
ASSERT_HOST(best_choice != NULL);
|
||||
|
||||
// Compute the number of unichars in the label.
|
||||
GenericVector<UNICHAR_ID> encoding;
|
||||
if (!unicharset.encode_string(label, true, &encoding, NULL, NULL)) {
|
||||
tprintf("Not outputting illegal unichar %s\n", label);
|
||||
return;
|
||||
}
|
||||
|
||||
// Dump all paths through the ratings matrix (which is normally small).
|
||||
int dim = werd_res->ratings->dimension();
|
||||
const BLOB_CHOICE** blob_choices = new const BLOB_CHOICE*[dim];
|
||||
PrintMatrixPaths(0, dim, *werd_res->ratings, 0, blob_choices,
|
||||
unicharset, label, output_file);
|
||||
delete[] blob_choices;
|
||||
}
|
||||
|
||||
} // namespace tesseract
|
|
@ -0,0 +1,798 @@
|
|||
/**********************************************************************
|
||||
* File: reject.cpp (Formerly reject.c)
|
||||
* Description: Rejection functions used in tessedit
|
||||
* Author: Phil Cheatle
|
||||
* Created: Wed Sep 23 16:50:21 BST 1992
|
||||
*
|
||||
* (C) Copyright 1992, Hewlett-Packard Ltd.
|
||||
** Licensed under the Apache License, Version 2.0 (the "License");
|
||||
** you may not use this file except in compliance with the License.
|
||||
** You may obtain a copy of the License at
|
||||
** http://www.apache.org/licenses/LICENSE-2.0
|
||||
** Unless required by applicable law or agreed to in writing, software
|
||||
** distributed under the License is distributed on an "AS IS" BASIS,
|
||||
** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
** See the License for the specific language governing permissions and
|
||||
** limitations under the License.
|
||||
*
|
||||
**********************************************************************/
|
||||
|
||||
#ifdef _MSC_VER
|
||||
#pragma warning(disable:4244) // Conversion warnings
|
||||
#pragma warning(disable:4305) // int/float warnings
|
||||
#endif
|
||||
|
||||
#include "tessvars.h"
|
||||
#ifdef __UNIX__
|
||||
#include <assert.h>
|
||||
#include <errno.h>
|
||||
#endif
|
||||
#include "scanutils.h"
|
||||
#include <ctype.h>
|
||||
#include <string.h>
|
||||
#include "genericvector.h"
|
||||
#include "reject.h"
|
||||
#include "control.h"
|
||||
#include "docqual.h"
|
||||
#include "globaloc.h" // For err_exit.
|
||||
#include "globals.h"
|
||||
#include "helpers.h"
|
||||
|
||||
#include "tesseractclass.h"
|
||||
|
||||
// Include automatically generated configuration file if running autoconf.
|
||||
#ifdef HAVE_CONFIG_H
|
||||
#include "config_auto.h"
|
||||
#endif
|
||||
|
||||
CLISTIZEH(STRING) CLISTIZE(STRING)
|
||||
|
||||
/*************************************************************************
|
||||
* set_done()
|
||||
*
|
||||
* Set the done flag based on the word acceptability criteria
|
||||
*************************************************************************/
|
||||
|
||||
namespace tesseract {
|
||||
void Tesseract::set_done(WERD_RES *word, inT16 pass) {
|
||||
word->done = word->tess_accepted &&
|
||||
(strchr(word->best_choice->unichar_string().string(), ' ') == NULL);
|
||||
bool word_is_ambig = word->best_choice->dangerous_ambig_found();
|
||||
bool word_from_dict = word->best_choice->permuter() == SYSTEM_DAWG_PERM ||
|
||||
word->best_choice->permuter() == FREQ_DAWG_PERM ||
|
||||
word->best_choice->permuter() == USER_DAWG_PERM;
|
||||
if (word->done && (pass == 1) && (!word_from_dict || word_is_ambig) &&
|
||||
one_ell_conflict(word, FALSE)) {
|
||||
if (tessedit_rejection_debug) tprintf("one_ell_conflict detected\n");
|
||||
word->done = FALSE;
|
||||
}
|
||||
if (word->done && ((!word_from_dict &&
|
||||
word->best_choice->permuter() != NUMBER_PERM) || word_is_ambig)) {
|
||||
if (tessedit_rejection_debug) tprintf("non-dict or ambig word detected\n");
|
||||
word->done = FALSE;
|
||||
}
|
||||
if (tessedit_rejection_debug) {
|
||||
tprintf("set_done(): done=%d\n", word->done);
|
||||
word->best_choice->print("");
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/*************************************************************************
|
||||
* make_reject_map()
|
||||
*
|
||||
* Sets the done flag to indicate whether the resylt is acceptable.
|
||||
*
|
||||
* Sets a reject map for the word.
|
||||
*************************************************************************/
|
||||
void Tesseract::make_reject_map(WERD_RES *word, ROW *row, inT16 pass) {
|
||||
int i;
|
||||
int offset;
|
||||
|
||||
flip_0O(word);
|
||||
check_debug_pt(word, -1); // For trap only
|
||||
set_done(word, pass); // Set acceptance
|
||||
word->reject_map.initialise(word->best_choice->unichar_lengths().length());
|
||||
reject_blanks(word);
|
||||
/*
|
||||
0: Rays original heuristic - the baseline
|
||||
*/
|
||||
if (tessedit_reject_mode == 0) {
|
||||
if (!word->done)
|
||||
reject_poor_matches(word);
|
||||
}
|
||||
else if (tessedit_reject_mode == 5) {
|
||||
/*
|
||||
5: Reject I/1/l from words where there is no strong contextual confirmation;
|
||||
the whole of any unacceptable words (incl PERM rej of dubious 1/I/ls);
|
||||
and the whole of any words which are very small
|
||||
*/
|
||||
if (kBlnXHeight / word->denorm.y_scale() <= min_sane_x_ht_pixels) {
|
||||
word->reject_map.rej_word_small_xht();
|
||||
}
|
||||
else {
|
||||
one_ell_conflict(word, TRUE);
|
||||
/*
|
||||
Originally the code here just used the done flag. Now I have duplicated
|
||||
and unpacked the conditions for setting the done flag so that each
|
||||
mechanism can be turned on or off independently. This works WITHOUT
|
||||
affecting the done flag setting.
|
||||
*/
|
||||
if (rej_use_tess_accepted && !word->tess_accepted)
|
||||
word->reject_map.rej_word_not_tess_accepted();
|
||||
|
||||
if (rej_use_tess_blanks &&
|
||||
(strchr(word->best_choice->unichar_string().string(), ' ') != NULL))
|
||||
word->reject_map.rej_word_contains_blanks();
|
||||
|
||||
WERD_CHOICE* best_choice = word->best_choice;
|
||||
if (rej_use_good_perm) {
|
||||
if ((best_choice->permuter() == SYSTEM_DAWG_PERM ||
|
||||
best_choice->permuter() == FREQ_DAWG_PERM ||
|
||||
best_choice->permuter() == USER_DAWG_PERM) &&
|
||||
(!rej_use_sensible_wd ||
|
||||
acceptable_word_string(*word->uch_set,
|
||||
best_choice->unichar_string().string(),
|
||||
best_choice->unichar_lengths().string()) !=
|
||||
AC_UNACCEPTABLE)) {
|
||||
// PASSED TEST
|
||||
}
|
||||
else if (best_choice->permuter() == NUMBER_PERM) {
|
||||
if (rej_alphas_in_number_perm) {
|
||||
for (i = 0, offset = 0;
|
||||
best_choice->unichar_string()[offset] != '\0';
|
||||
offset += best_choice->unichar_lengths()[i++]) {
|
||||
if (word->reject_map[i].accepted() &&
|
||||
word->uch_set->get_isalpha(
|
||||
best_choice->unichar_string().string() + offset,
|
||||
best_choice->unichar_lengths()[i]))
|
||||
word->reject_map[i].setrej_bad_permuter();
|
||||
// rej alpha
|
||||
}
|
||||
}
|
||||
}
|
||||
else {
|
||||
word->reject_map.rej_word_bad_permuter();
|
||||
}
|
||||
}
|
||||
/* Ambig word rejection was here once !!*/
|
||||
}
|
||||
}
|
||||
else {
|
||||
tprintf("BAD tessedit_reject_mode\n");
|
||||
err_exit();
|
||||
}
|
||||
|
||||
if (tessedit_image_border > -1)
|
||||
reject_edge_blobs(word);
|
||||
|
||||
check_debug_pt(word, 10);
|
||||
if (tessedit_rejection_debug) {
|
||||
tprintf("Permuter Type = %d\n", word->best_choice->permuter());
|
||||
tprintf("Certainty: %f Rating: %f\n",
|
||||
word->best_choice->certainty(), word->best_choice->rating());
|
||||
tprintf("Dict word: %d\n", dict_word(*(word->best_choice)));
|
||||
}
|
||||
|
||||
flip_hyphens(word);
|
||||
check_debug_pt(word, 20);
|
||||
}
|
||||
} // namespace tesseract
|
||||
|
||||
|
||||
void reject_blanks(WERD_RES *word) {
|
||||
inT16 i;
|
||||
inT16 offset;
|
||||
|
||||
for (i = 0, offset = 0; word->best_choice->unichar_string()[offset] != '\0';
|
||||
offset += word->best_choice->unichar_lengths()[i], i += 1) {
|
||||
if (word->best_choice->unichar_string()[offset] == ' ')
|
||||
//rej unrecognised blobs
|
||||
word->reject_map[i].setrej_tess_failure();
|
||||
}
|
||||
}
|
||||
|
||||
namespace tesseract {
|
||||
void Tesseract::reject_I_1_L(WERD_RES *word) {
|
||||
inT16 i;
|
||||
inT16 offset;
|
||||
|
||||
for (i = 0, offset = 0; word->best_choice->unichar_string()[offset] != '\0';
|
||||
offset += word->best_choice->unichar_lengths()[i], i += 1) {
|
||||
if (STRING(conflict_set_I_l_1).
|
||||
contains(word->best_choice->unichar_string()[offset])) {
|
||||
//rej 1Il conflict
|
||||
word->reject_map[i].setrej_1Il_conflict();
|
||||
}
|
||||
}
|
||||
}
|
||||
} // namespace tesseract
|
||||
|
||||
|
||||
void reject_poor_matches(WERD_RES *word) {
|
||||
float threshold = compute_reject_threshold(word->best_choice);
|
||||
for (int i = 0; i < word->best_choice->length(); ++i) {
|
||||
if (word->best_choice->unichar_id(i) == UNICHAR_SPACE)
|
||||
word->reject_map[i].setrej_tess_failure();
|
||||
else if (word->best_choice->certainty(i) < threshold)
|
||||
word->reject_map[i].setrej_poor_match();
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/**********************************************************************
|
||||
* compute_reject_threshold
|
||||
*
|
||||
* Set a rejection threshold for this word.
|
||||
* Initially this is a trivial function which looks for the largest
|
||||
* gap in the certainty value.
|
||||
**********************************************************************/
|
||||
|
||||
float compute_reject_threshold(WERD_CHOICE* word) {
|
||||
float threshold; // rejection threshold
|
||||
float bestgap = 0.0f; // biggest gap
|
||||
float gapstart; // bottom of gap
|
||||
// super iterator
|
||||
BLOB_CHOICE_IT choice_it; // real iterator
|
||||
|
||||
int blob_count = word->length();
|
||||
GenericVector<float> ratings;
|
||||
ratings.resize_no_init(blob_count);
|
||||
for (int i = 0; i < blob_count; ++i) {
|
||||
ratings[i] = word->certainty(i);
|
||||
}
|
||||
ratings.sort();
|
||||
gapstart = ratings[0] - 1; // all reject if none better
|
||||
if (blob_count >= 3) {
|
||||
for (int index = 0; index < blob_count - 1; index++) {
|
||||
if (ratings[index + 1] - ratings[index] > bestgap) {
|
||||
bestgap = ratings[index + 1] - ratings[index];
|
||||
// find biggest
|
||||
gapstart = ratings[index];
|
||||
}
|
||||
}
|
||||
}
|
||||
threshold = gapstart + bestgap / 2;
|
||||
|
||||
return threshold;
|
||||
}
|
||||
|
||||
|
||||
/*************************************************************************
|
||||
* reject_edge_blobs()
|
||||
*
|
||||
* If the word is perilously close to the edge of the image, reject those blobs
|
||||
* in the word which are too close to the edge as they could be clipped.
|
||||
*************************************************************************/
|
||||
namespace tesseract {
|
||||
void Tesseract::reject_edge_blobs(WERD_RES *word) {
|
||||
TBOX word_box = word->word->bounding_box();
|
||||
// Use the box_word as it is already denormed back to image coordinates.
|
||||
int blobcount = word->box_word->length();
|
||||
|
||||
if (word_box.left() < tessedit_image_border ||
|
||||
word_box.bottom() < tessedit_image_border ||
|
||||
word_box.right() + tessedit_image_border > ImageWidth() - 1 ||
|
||||
word_box.top() + tessedit_image_border > ImageHeight() - 1) {
|
||||
ASSERT_HOST(word->reject_map.length() == blobcount);
|
||||
for (int blobindex = 0; blobindex < blobcount; blobindex++) {
|
||||
TBOX blob_box = word->box_word->BlobBox(blobindex);
|
||||
if (blob_box.left() < tessedit_image_border ||
|
||||
blob_box.bottom() < tessedit_image_border ||
|
||||
blob_box.right() + tessedit_image_border > ImageWidth() - 1 ||
|
||||
blob_box.top() + tessedit_image_border > ImageHeight() - 1) {
|
||||
word->reject_map[blobindex].setrej_edge_char();
|
||||
// Close to edge
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**********************************************************************
|
||||
* one_ell_conflict()
|
||||
*
|
||||
* Identify words where there is a potential I/l/1 error.
|
||||
* - A bundle of contextual heuristics!
|
||||
**********************************************************************/
|
||||
BOOL8 Tesseract::one_ell_conflict(WERD_RES *word_res, BOOL8 update_map) {
|
||||
const char *word;
|
||||
const char *lengths;
|
||||
inT16 word_len; //its length
|
||||
inT16 first_alphanum_index_;
|
||||
inT16 first_alphanum_offset_;
|
||||
inT16 i;
|
||||
inT16 offset;
|
||||
BOOL8 non_conflict_set_char; //non conf set a/n?
|
||||
BOOL8 conflict = FALSE;
|
||||
BOOL8 allow_1s;
|
||||
ACCEPTABLE_WERD_TYPE word_type;
|
||||
BOOL8 dict_perm_type;
|
||||
BOOL8 dict_word_ok;
|
||||
int dict_word_type;
|
||||
|
||||
word = word_res->best_choice->unichar_string().string();
|
||||
lengths = word_res->best_choice->unichar_lengths().string();
|
||||
word_len = strlen(lengths);
|
||||
/*
|
||||
If there are no occurrences of the conflict set characters then the word
|
||||
is OK.
|
||||
*/
|
||||
if (strpbrk(word, conflict_set_I_l_1.string()) == NULL)
|
||||
return FALSE;
|
||||
|
||||
/*
|
||||
There is a conflict if there are NO other (confirmed) alphanumerics apart
|
||||
from those in the conflict set.
|
||||
*/
|
||||
|
||||
for (i = 0, offset = 0, non_conflict_set_char = FALSE;
|
||||
(i < word_len) && !non_conflict_set_char; offset += lengths[i++])
|
||||
non_conflict_set_char =
|
||||
(word_res->uch_set->get_isalpha(word + offset, lengths[i]) ||
|
||||
word_res->uch_set->get_isdigit(word + offset, lengths[i])) &&
|
||||
!STRING(conflict_set_I_l_1).contains(word[offset]);
|
||||
if (!non_conflict_set_char) {
|
||||
if (update_map)
|
||||
reject_I_1_L(word_res);
|
||||
return TRUE;
|
||||
}
|
||||
|
||||
/*
|
||||
If the word is accepted by a dawg permuter, and the first alpha character
|
||||
is "I" or "l", check to see if the alternative is also a dawg word. If it
|
||||
is, then there is a potential error otherwise the word is ok.
|
||||
*/
|
||||
|
||||
dict_perm_type = (word_res->best_choice->permuter() == SYSTEM_DAWG_PERM) ||
|
||||
(word_res->best_choice->permuter() == USER_DAWG_PERM) ||
|
||||
(rej_trust_doc_dawg &&
|
||||
(word_res->best_choice->permuter() == DOC_DAWG_PERM)) ||
|
||||
(word_res->best_choice->permuter() == FREQ_DAWG_PERM);
|
||||
dict_word_type = dict_word(*(word_res->best_choice));
|
||||
dict_word_ok = (dict_word_type > 0) &&
|
||||
(rej_trust_doc_dawg || (dict_word_type != DOC_DAWG_PERM));
|
||||
|
||||
if ((rej_1Il_use_dict_word && dict_word_ok) ||
|
||||
(rej_1Il_trust_permuter_type && dict_perm_type) ||
|
||||
(dict_perm_type && dict_word_ok)) {
|
||||
first_alphanum_index_ = first_alphanum_index(word, lengths);
|
||||
first_alphanum_offset_ = first_alphanum_offset(word, lengths);
|
||||
if (lengths[first_alphanum_index_] == 1 &&
|
||||
word[first_alphanum_offset_] == 'I') {
|
||||
word_res->best_choice->unichar_string()[first_alphanum_offset_] = 'l';
|
||||
if (safe_dict_word(word_res) > 0) {
|
||||
word_res->best_choice->unichar_string()[first_alphanum_offset_] = 'I';
|
||||
if (update_map)
|
||||
word_res->reject_map[first_alphanum_index_].
|
||||
setrej_1Il_conflict();
|
||||
return TRUE;
|
||||
}
|
||||
else {
|
||||
word_res->best_choice->unichar_string()[first_alphanum_offset_] = 'I';
|
||||
return FALSE;
|
||||
}
|
||||
}
|
||||
|
||||
if (lengths[first_alphanum_index_] == 1 &&
|
||||
word[first_alphanum_offset_] == 'l') {
|
||||
word_res->best_choice->unichar_string()[first_alphanum_offset_] = 'I';
|
||||
if (safe_dict_word(word_res) > 0) {
|
||||
word_res->best_choice->unichar_string()[first_alphanum_offset_] = 'l';
|
||||
if (update_map)
|
||||
word_res->reject_map[first_alphanum_index_].
|
||||
setrej_1Il_conflict();
|
||||
return TRUE;
|
||||
}
|
||||
else {
|
||||
word_res->best_choice->unichar_string()[first_alphanum_offset_] = 'l';
|
||||
return FALSE;
|
||||
}
|
||||
}
|
||||
return FALSE;
|
||||
}
|
||||
|
||||
/*
|
||||
NEW 1Il code. The old code relied on permuter types too much. In fact,
|
||||
tess will use TOP_CHOICE permute for good things like "palette".
|
||||
In this code the string is examined independently to see if it looks like
|
||||
a well formed word.
|
||||
*/
|
||||
|
||||
/*
|
||||
REGARDLESS OF PERMUTER, see if flipping a leading I/l generates a
|
||||
dictionary word.
|
||||
*/
|
||||
first_alphanum_index_ = first_alphanum_index(word, lengths);
|
||||
first_alphanum_offset_ = first_alphanum_offset(word, lengths);
|
||||
if (lengths[first_alphanum_index_] == 1 &&
|
||||
word[first_alphanum_offset_] == 'l') {
|
||||
word_res->best_choice->unichar_string()[first_alphanum_offset_] = 'I';
|
||||
if (safe_dict_word(word_res) > 0)
|
||||
return FALSE;
|
||||
else
|
||||
word_res->best_choice->unichar_string()[first_alphanum_offset_] = 'l';
|
||||
}
|
||||
else if (lengths[first_alphanum_index_] == 1 &&
|
||||
word[first_alphanum_offset_] == 'I') {
|
||||
word_res->best_choice->unichar_string()[first_alphanum_offset_] = 'l';
|
||||
if (safe_dict_word(word_res) > 0)
|
||||
return FALSE;
|
||||
else
|
||||
word_res->best_choice->unichar_string()[first_alphanum_offset_] = 'I';
|
||||
}
|
||||
/*
|
||||
For strings containing digits:
|
||||
If there are no alphas OR the numeric permuter liked the word,
|
||||
reject any non 1 conflict chs
|
||||
Else reject all conflict chs
|
||||
*/
|
||||
if (word_contains_non_1_digit(word, lengths)) {
|
||||
allow_1s = (alpha_count(word, lengths) == 0) ||
|
||||
(word_res->best_choice->permuter() == NUMBER_PERM);
|
||||
|
||||
inT16 offset;
|
||||
conflict = FALSE;
|
||||
for (i = 0, offset = 0; word[offset] != '\0';
|
||||
offset += word_res->best_choice->unichar_lengths()[i++]) {
|
||||
if ((!allow_1s || (word[offset] != '1')) &&
|
||||
STRING(conflict_set_I_l_1).contains(word[offset])) {
|
||||
if (update_map)
|
||||
word_res->reject_map[i].setrej_1Il_conflict();
|
||||
conflict = TRUE;
|
||||
}
|
||||
}
|
||||
return conflict;
|
||||
}
|
||||
/*
|
||||
For anything else. See if it conforms to an acceptable word type. If so,
|
||||
treat accordingly.
|
||||
*/
|
||||
word_type = acceptable_word_string(*word_res->uch_set, word, lengths);
|
||||
if ((word_type == AC_LOWER_CASE) || (word_type == AC_INITIAL_CAP)) {
|
||||
first_alphanum_index_ = first_alphanum_index(word, lengths);
|
||||
first_alphanum_offset_ = first_alphanum_offset(word, lengths);
|
||||
if (STRING(conflict_set_I_l_1).contains(word[first_alphanum_offset_])) {
|
||||
if (update_map)
|
||||
word_res->reject_map[first_alphanum_index_].
|
||||
setrej_1Il_conflict();
|
||||
return TRUE;
|
||||
}
|
||||
else
|
||||
return FALSE;
|
||||
}
|
||||
else if (word_type == AC_UPPER_CASE) {
|
||||
return FALSE;
|
||||
}
|
||||
else {
|
||||
if (update_map)
|
||||
reject_I_1_L(word_res);
|
||||
return TRUE;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
inT16 Tesseract::first_alphanum_index(const char *word,
|
||||
const char *word_lengths) {
|
||||
inT16 i;
|
||||
inT16 offset;
|
||||
|
||||
for (i = 0, offset = 0; word[offset] != '\0'; offset += word_lengths[i++]) {
|
||||
if (unicharset.get_isalpha(word + offset, word_lengths[i]) ||
|
||||
unicharset.get_isdigit(word + offset, word_lengths[i]))
|
||||
return i;
|
||||
}
|
||||
return -1;
|
||||
}
|
||||
|
||||
inT16 Tesseract::first_alphanum_offset(const char *word,
|
||||
const char *word_lengths) {
|
||||
inT16 i;
|
||||
inT16 offset;
|
||||
|
||||
for (i = 0, offset = 0; word[offset] != '\0'; offset += word_lengths[i++]) {
|
||||
if (unicharset.get_isalpha(word + offset, word_lengths[i]) ||
|
||||
unicharset.get_isdigit(word + offset, word_lengths[i]))
|
||||
return offset;
|
||||
}
|
||||
return -1;
|
||||
}
|
||||
|
||||
inT16 Tesseract::alpha_count(const char *word,
|
||||
const char *word_lengths) {
|
||||
inT16 i;
|
||||
inT16 offset;
|
||||
inT16 count = 0;
|
||||
|
||||
for (i = 0, offset = 0; word[offset] != '\0'; offset += word_lengths[i++]) {
|
||||
if (unicharset.get_isalpha(word + offset, word_lengths[i]))
|
||||
count++;
|
||||
}
|
||||
return count;
|
||||
}
|
||||
|
||||
|
||||
BOOL8 Tesseract::word_contains_non_1_digit(const char *word,
|
||||
const char *word_lengths) {
|
||||
inT16 i;
|
||||
inT16 offset;
|
||||
|
||||
for (i = 0, offset = 0; word[offset] != '\0'; offset += word_lengths[i++]) {
|
||||
if (unicharset.get_isdigit(word + offset, word_lengths[i]) &&
|
||||
(word_lengths[i] != 1 || word[offset] != '1'))
|
||||
return TRUE;
|
||||
}
|
||||
return FALSE;
|
||||
}
|
||||
|
||||
/*************************************************************************
|
||||
* dont_allow_1Il()
|
||||
* Don't unreject LONE accepted 1Il conflict set chars
|
||||
*************************************************************************/
|
||||
void Tesseract::dont_allow_1Il(WERD_RES *word) {
|
||||
int i = 0;
|
||||
int offset;
|
||||
int word_len = word->reject_map.length();
|
||||
const char *s = word->best_choice->unichar_string().string();
|
||||
const char *lengths = word->best_choice->unichar_lengths().string();
|
||||
BOOL8 accepted_1Il = FALSE;
|
||||
|
||||
for (i = 0, offset = 0; i < word_len;
|
||||
offset += word->best_choice->unichar_lengths()[i++]) {
|
||||
if (word->reject_map[i].accepted()) {
|
||||
if (STRING(conflict_set_I_l_1).contains(s[offset])) {
|
||||
accepted_1Il = TRUE;
|
||||
}
|
||||
else {
|
||||
if (word->uch_set->get_isalpha(s + offset, lengths[i]) ||
|
||||
word->uch_set->get_isdigit(s + offset, lengths[i]))
|
||||
return; // >=1 non 1Il ch accepted
|
||||
}
|
||||
}
|
||||
}
|
||||
if (!accepted_1Il)
|
||||
return; //Nothing to worry about
|
||||
|
||||
for (i = 0, offset = 0; i < word_len;
|
||||
offset += word->best_choice->unichar_lengths()[i++]) {
|
||||
if (STRING(conflict_set_I_l_1).contains(s[offset]) &&
|
||||
word->reject_map[i].accepted())
|
||||
word->reject_map[i].setrej_postNN_1Il();
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
inT16 Tesseract::count_alphanums(WERD_RES *word_res) {
|
||||
int count = 0;
|
||||
const WERD_CHOICE *best_choice = word_res->best_choice;
|
||||
for (int i = 0; i < word_res->reject_map.length(); ++i) {
|
||||
if ((word_res->reject_map[i].accepted()) &&
|
||||
(word_res->uch_set->get_isalpha(best_choice->unichar_id(i)) ||
|
||||
word_res->uch_set->get_isdigit(best_choice->unichar_id(i)))) {
|
||||
count++;
|
||||
}
|
||||
}
|
||||
return count;
|
||||
}
|
||||
|
||||
|
||||
// reject all if most rejected.
|
||||
void Tesseract::reject_mostly_rejects(WERD_RES *word) {
|
||||
/* Reject the whole of the word if the fraction of rejects exceeds a limit */
|
||||
|
||||
if ((float)word->reject_map.reject_count() / word->reject_map.length() >=
|
||||
rej_whole_of_mostly_reject_word_fract)
|
||||
word->reject_map.rej_word_mostly_rej();
|
||||
}
|
||||
|
||||
|
||||
BOOL8 Tesseract::repeated_nonalphanum_wd(WERD_RES *word, ROW *row) {
|
||||
inT16 char_quality;
|
||||
inT16 accepted_char_quality;
|
||||
|
||||
if (word->best_choice->unichar_lengths().length() <= 1)
|
||||
return FALSE;
|
||||
|
||||
if (!STRING(ok_repeated_ch_non_alphanum_wds).
|
||||
contains(word->best_choice->unichar_string()[0]))
|
||||
return FALSE;
|
||||
|
||||
UNICHAR_ID uch_id = word->best_choice->unichar_id(0);
|
||||
for (int i = 1; i < word->best_choice->length(); ++i) {
|
||||
if (word->best_choice->unichar_id(i) != uch_id) return FALSE;
|
||||
}
|
||||
|
||||
word_char_quality(word, row, &char_quality, &accepted_char_quality);
|
||||
|
||||
if ((word->best_choice->unichar_lengths().length() == char_quality) &&
|
||||
(char_quality == accepted_char_quality))
|
||||
return TRUE;
|
||||
else
|
||||
return FALSE;
|
||||
}
|
||||
|
||||
inT16 Tesseract::safe_dict_word(const WERD_RES *werd_res) {
|
||||
const WERD_CHOICE &word = *werd_res->best_choice;
|
||||
int dict_word_type = werd_res->tesseract->dict_word(word);
|
||||
return dict_word_type == DOC_DAWG_PERM ? 0 : dict_word_type;
|
||||
}
|
||||
|
||||
// Note: After running this function word_res->ratings
|
||||
// might not contain the right BLOB_CHOICE corresponding to each character
|
||||
// in word_res->best_choice.
|
||||
void Tesseract::flip_hyphens(WERD_RES *word_res) {
|
||||
WERD_CHOICE *best_choice = word_res->best_choice;
|
||||
int i;
|
||||
int prev_right = -9999;
|
||||
int next_left;
|
||||
TBOX out_box;
|
||||
float aspect_ratio;
|
||||
|
||||
if (tessedit_lower_flip_hyphen <= 1)
|
||||
return;
|
||||
|
||||
int num_blobs = word_res->rebuild_word->NumBlobs();
|
||||
UNICHAR_ID unichar_dash = word_res->uch_set->unichar_to_id("-");
|
||||
for (i = 0; i < best_choice->length() && i < num_blobs; ++i) {
|
||||
TBLOB* blob = word_res->rebuild_word->blobs[i];
|
||||
out_box = blob->bounding_box();
|
||||
if (i + 1 == num_blobs)
|
||||
next_left = 9999;
|
||||
else
|
||||
next_left = word_res->rebuild_word->blobs[i + 1]->bounding_box().left();
|
||||
// Don't touch small or touching blobs - it is too dangerous.
|
||||
if ((out_box.width() > 8 * word_res->denorm.x_scale()) &&
|
||||
(out_box.left() > prev_right) && (out_box.right() < next_left)) {
|
||||
aspect_ratio = out_box.width() / (float)out_box.height();
|
||||
if (word_res->uch_set->eq(best_choice->unichar_id(i), ".")) {
|
||||
if (aspect_ratio >= tessedit_upper_flip_hyphen &&
|
||||
word_res->uch_set->contains_unichar_id(unichar_dash) &&
|
||||
word_res->uch_set->get_enabled(unichar_dash)) {
|
||||
/* Certain HYPHEN */
|
||||
best_choice->set_unichar_id(unichar_dash, i);
|
||||
if (word_res->reject_map[i].rejected())
|
||||
word_res->reject_map[i].setrej_hyphen_accept();
|
||||
}
|
||||
if ((aspect_ratio > tessedit_lower_flip_hyphen) &&
|
||||
word_res->reject_map[i].accepted())
|
||||
//Suspected HYPHEN
|
||||
word_res->reject_map[i].setrej_hyphen();
|
||||
}
|
||||
else if (best_choice->unichar_id(i) == unichar_dash) {
|
||||
if ((aspect_ratio >= tessedit_upper_flip_hyphen) &&
|
||||
(word_res->reject_map[i].rejected()))
|
||||
word_res->reject_map[i].setrej_hyphen_accept();
|
||||
//Certain HYPHEN
|
||||
|
||||
if ((aspect_ratio <= tessedit_lower_flip_hyphen) &&
|
||||
(word_res->reject_map[i].accepted()))
|
||||
//Suspected HYPHEN
|
||||
word_res->reject_map[i].setrej_hyphen();
|
||||
}
|
||||
}
|
||||
prev_right = out_box.right();
|
||||
}
|
||||
}
|
||||
|
||||
// Note: After running this function word_res->ratings
|
||||
// might not contain the right BLOB_CHOICE corresponding to each character
|
||||
// in word_res->best_choice.
|
||||
void Tesseract::flip_0O(WERD_RES *word_res) {
|
||||
WERD_CHOICE *best_choice = word_res->best_choice;
|
||||
int i;
|
||||
TBOX out_box;
|
||||
|
||||
if (!tessedit_flip_0O)
|
||||
return;
|
||||
|
||||
int num_blobs = word_res->rebuild_word->NumBlobs();
|
||||
for (i = 0; i < best_choice->length() && i < num_blobs; ++i) {
|
||||
TBLOB* blob = word_res->rebuild_word->blobs[i];
|
||||
if (word_res->uch_set->get_isupper(best_choice->unichar_id(i)) ||
|
||||
word_res->uch_set->get_isdigit(best_choice->unichar_id(i))) {
|
||||
out_box = blob->bounding_box();
|
||||
if ((out_box.top() < kBlnBaselineOffset + kBlnXHeight) ||
|
||||
(out_box.bottom() > kBlnBaselineOffset + kBlnXHeight / 4))
|
||||
return; //Beware words with sub/superscripts
|
||||
}
|
||||
}
|
||||
UNICHAR_ID unichar_0 = word_res->uch_set->unichar_to_id("0");
|
||||
UNICHAR_ID unichar_O = word_res->uch_set->unichar_to_id("O");
|
||||
if (unichar_0 == INVALID_UNICHAR_ID ||
|
||||
!word_res->uch_set->get_enabled(unichar_0) ||
|
||||
unichar_O == INVALID_UNICHAR_ID ||
|
||||
!word_res->uch_set->get_enabled(unichar_O)) {
|
||||
return; // 0 or O are not present/enabled in unicharset
|
||||
}
|
||||
for (i = 1; i < best_choice->length(); ++i) {
|
||||
if (best_choice->unichar_id(i) == unichar_0 ||
|
||||
best_choice->unichar_id(i) == unichar_O) {
|
||||
/* A0A */
|
||||
if ((i + 1) < best_choice->length() &&
|
||||
non_O_upper(*word_res->uch_set, best_choice->unichar_id(i - 1)) &&
|
||||
non_O_upper(*word_res->uch_set, best_choice->unichar_id(i + 1))) {
|
||||
best_choice->set_unichar_id(unichar_O, i);
|
||||
}
|
||||
/* A00A */
|
||||
if (non_O_upper(*word_res->uch_set, best_choice->unichar_id(i - 1)) &&
|
||||
(i + 1) < best_choice->length() &&
|
||||
(best_choice->unichar_id(i + 1) == unichar_0 ||
|
||||
best_choice->unichar_id(i + 1) == unichar_O) &&
|
||||
(i + 2) < best_choice->length() &&
|
||||
non_O_upper(*word_res->uch_set, best_choice->unichar_id(i + 2))) {
|
||||
best_choice->set_unichar_id(unichar_O, i);
|
||||
i++;
|
||||
}
|
||||
/* AA0<non digit or end of word> */
|
||||
if ((i > 1) &&
|
||||
non_O_upper(*word_res->uch_set, best_choice->unichar_id(i - 2)) &&
|
||||
non_O_upper(*word_res->uch_set, best_choice->unichar_id(i - 1)) &&
|
||||
(((i + 1) < best_choice->length() &&
|
||||
!word_res->uch_set->get_isdigit(best_choice->unichar_id(i + 1)) &&
|
||||
!word_res->uch_set->eq(best_choice->unichar_id(i + 1), "l") &&
|
||||
!word_res->uch_set->eq(best_choice->unichar_id(i + 1), "I")) ||
|
||||
(i == best_choice->length() - 1))) {
|
||||
best_choice->set_unichar_id(unichar_O, i);
|
||||
}
|
||||
/* 9O9 */
|
||||
if (non_0_digit(*word_res->uch_set, best_choice->unichar_id(i - 1)) &&
|
||||
(i + 1) < best_choice->length() &&
|
||||
non_0_digit(*word_res->uch_set, best_choice->unichar_id(i + 1))) {
|
||||
best_choice->set_unichar_id(unichar_0, i);
|
||||
}
|
||||
/* 9OOO */
|
||||
if (non_0_digit(*word_res->uch_set, best_choice->unichar_id(i - 1)) &&
|
||||
(i + 2) < best_choice->length() &&
|
||||
(best_choice->unichar_id(i + 1) == unichar_0 ||
|
||||
best_choice->unichar_id(i + 1) == unichar_O) &&
|
||||
(best_choice->unichar_id(i + 2) == unichar_0 ||
|
||||
best_choice->unichar_id(i + 2) == unichar_O)) {
|
||||
best_choice->set_unichar_id(unichar_0, i);
|
||||
best_choice->set_unichar_id(unichar_0, i + 1);
|
||||
best_choice->set_unichar_id(unichar_0, i + 2);
|
||||
i += 2;
|
||||
}
|
||||
/* 9OO<non upper> */
|
||||
if (non_0_digit(*word_res->uch_set, best_choice->unichar_id(i - 1)) &&
|
||||
(i + 2) < best_choice->length() &&
|
||||
(best_choice->unichar_id(i + 1) == unichar_0 ||
|
||||
best_choice->unichar_id(i + 1) == unichar_O) &&
|
||||
!word_res->uch_set->get_isupper(best_choice->unichar_id(i + 2))) {
|
||||
best_choice->set_unichar_id(unichar_0, i);
|
||||
best_choice->set_unichar_id(unichar_0, i + 1);
|
||||
i++;
|
||||
}
|
||||
/* 9O<non upper> */
|
||||
if (non_0_digit(*word_res->uch_set, best_choice->unichar_id(i - 1)) &&
|
||||
(i + 1) < best_choice->length() &&
|
||||
!word_res->uch_set->get_isupper(best_choice->unichar_id(i + 1))) {
|
||||
best_choice->set_unichar_id(unichar_0, i);
|
||||
}
|
||||
/* 9[.,]OOO.. */
|
||||
if ((i > 1) &&
|
||||
(word_res->uch_set->eq(best_choice->unichar_id(i - 1), ".") ||
|
||||
word_res->uch_set->eq(best_choice->unichar_id(i - 1), ",")) &&
|
||||
(word_res->uch_set->get_isdigit(best_choice->unichar_id(i - 2)) ||
|
||||
best_choice->unichar_id(i - 2) == unichar_O)) {
|
||||
if (best_choice->unichar_id(i - 2) == unichar_O) {
|
||||
best_choice->set_unichar_id(unichar_0, i - 2);
|
||||
}
|
||||
while (i < best_choice->length() &&
|
||||
(best_choice->unichar_id(i) == unichar_O ||
|
||||
best_choice->unichar_id(i) == unichar_0)) {
|
||||
best_choice->set_unichar_id(unichar_0, i);
|
||||
i++;
|
||||
}
|
||||
i--;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
BOOL8 Tesseract::non_O_upper(const UNICHARSET& ch_set, UNICHAR_ID unichar_id) {
|
||||
return ch_set.get_isupper(unichar_id) && !ch_set.eq(unichar_id, "O");
|
||||
}
|
||||
|
||||
BOOL8 Tesseract::non_0_digit(const UNICHARSET& ch_set, UNICHAR_ID unichar_id) {
|
||||
return ch_set.get_isdigit(unichar_id) && !ch_set.eq(unichar_id, "0");
|
||||
}
|
||||
} // namespace tesseract
|
|
@ -0,0 +1,34 @@
|
|||
/**********************************************************************
|
||||
* File: reject.h (Formerly reject.h)
|
||||
* Description: Rejection functions used in tessedit
|
||||
* Author: Phil Cheatle
|
||||
* Created: Wed Sep 23 16:50:21 BST 1992
|
||||
*
|
||||
* (C) Copyright 1992, Hewlett-Packard Ltd.
|
||||
** Licensed under the Apache License, Version 2.0 (the "License");
|
||||
** you may not use this file except in compliance with the License.
|
||||
** You may obtain a copy of the License at
|
||||
** http://www.apache.org/licenses/LICENSE-2.0
|
||||
** Unless required by applicable law or agreed to in writing, software
|
||||
** distributed under the License is distributed on an "AS IS" BASIS,
|
||||
** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
** See the License for the specific language governing permissions and
|
||||
** limitations under the License.
|
||||
*
|
||||
**********************************************************************/
|
||||
|
||||
#ifndef REJECT_H
|
||||
#define REJECT_H
|
||||
|
||||
#include "params.h"
|
||||
#include "pageres.h"
|
||||
|
||||
void reject_blanks(WERD_RES *word);
|
||||
void reject_poor_matches(WERD_RES *word);
|
||||
float compute_reject_threshold(WERD_CHOICE* word);
|
||||
BOOL8 word_contains_non_1_digit(const char *word, const char *word_lengths);
|
||||
void dont_allow_1Il(WERD_RES *word);
|
||||
void flip_hyphens(WERD_RES *word);
|
||||
void flip_0O(WERD_RES *word);
|
||||
BOOL8 non_0_digit(const char* str, int length);
|
||||
#endif
|
|
@ -0,0 +1,683 @@
|
|||
///////////////////////////////////////////////////////////////////////
|
||||
// File: resultiterator.cpp
|
||||
// Description: Iterator for tesseract results that is capable of
|
||||
// iterating in proper reading order over Bi Directional
|
||||
// (e.g. mixed Hebrew and English) text.
|
||||
// Author: David Eger
|
||||
// Created: Fri May 27 13:58:06 PST 2011
|
||||
//
|
||||
// (C) Copyright 2011, Google Inc.
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
///////////////////////////////////////////////////////////////////////
|
||||
|
||||
#include "resultiterator.h"
|
||||
|
||||
#include "allheaders.h"
|
||||
#include "pageres.h"
|
||||
#include "strngs.h"
|
||||
#include "tesseractclass.h"
|
||||
#include "unicharset.h"
|
||||
#include "unicodes.h"
|
||||
|
||||
namespace tesseract {
|
||||
|
||||
ResultIterator::ResultIterator(const LTRResultIterator &resit)
|
||||
: LTRResultIterator(resit) {
|
||||
in_minor_direction_ = false;
|
||||
at_beginning_of_minor_run_ = false;
|
||||
preserve_interword_spaces_ = false;
|
||||
|
||||
BoolParam *p = ParamUtils::FindParam<BoolParam>(
|
||||
"preserve_interword_spaces", GlobalParams()->bool_params,
|
||||
tesseract_->params()->bool_params);
|
||||
if (p != NULL) preserve_interword_spaces_ = (bool)(*p);
|
||||
|
||||
current_paragraph_is_ltr_ = CurrentParagraphIsLtr();
|
||||
MoveToLogicalStartOfTextline();
|
||||
}
|
||||
|
||||
ResultIterator *ResultIterator::StartOfParagraph(
|
||||
const LTRResultIterator &resit) {
|
||||
return new ResultIterator(resit);
|
||||
}
|
||||
|
||||
bool ResultIterator::ParagraphIsLtr() const {
|
||||
return current_paragraph_is_ltr_;
|
||||
}
|
||||
|
||||
bool ResultIterator::CurrentParagraphIsLtr() const {
|
||||
if (!it_->word())
|
||||
return true; // doesn't matter.
|
||||
LTRResultIterator it(*this);
|
||||
it.RestartParagraph();
|
||||
// Try to figure out the ltr-ness of the paragraph. The rules below
|
||||
// make more sense in the context of a difficult paragraph example.
|
||||
// Here we denote {ltr characters, RTL CHARACTERS}:
|
||||
//
|
||||
// "don't go in there!" DAIS EH
|
||||
// EHT OTNI DEPMUJ FELSMIH NEHT DNA
|
||||
// .GNIDLIUB GNINRUB
|
||||
//
|
||||
// On the first line, the left-most word is LTR and the rightmost word
|
||||
// is RTL. Thus, we are better off taking the majority direction for
|
||||
// the whole paragraph contents. So instead of "the leftmost word is LTR"
|
||||
// indicating an LTR paragraph, we use a heuristic about what RTL paragraphs
|
||||
// would not do: Typically an RTL paragraph would *not* start with an LTR
|
||||
// word. So our heuristics are as follows:
|
||||
//
|
||||
// (1) If the first text line has an RTL word in the left-most position
|
||||
// it is RTL.
|
||||
// (2) If the first text line has an LTR word in the right-most position
|
||||
// it is LTR.
|
||||
// (3) If neither of the above is true, take the majority count for the
|
||||
// paragraph -- if there are more rtl words, it is RTL. If there
|
||||
// are more LTR words, it's LTR.
|
||||
bool leftmost_rtl = it.WordDirection() == DIR_RIGHT_TO_LEFT;
|
||||
bool rightmost_ltr = it.WordDirection() == DIR_LEFT_TO_RIGHT;
|
||||
int num_ltr, num_rtl;
|
||||
num_rtl = leftmost_rtl ? 1 : 0;
|
||||
num_ltr = (it.WordDirection() == DIR_LEFT_TO_RIGHT) ? 1 : 0;
|
||||
for (it.Next(RIL_WORD);
|
||||
!it.Empty(RIL_WORD) && !it.IsAtBeginningOf(RIL_TEXTLINE);
|
||||
it.Next(RIL_WORD)) {
|
||||
StrongScriptDirection dir = it.WordDirection();
|
||||
rightmost_ltr = (dir == DIR_LEFT_TO_RIGHT);
|
||||
num_rtl += (dir == DIR_RIGHT_TO_LEFT) ? 1 : 0;
|
||||
num_ltr += rightmost_ltr ? 1 : 0;
|
||||
}
|
||||
if (leftmost_rtl)
|
||||
return false;
|
||||
if (rightmost_ltr)
|
||||
return true;
|
||||
// First line is ambiguous. Take statistics on the whole paragraph.
|
||||
if (!it.Empty(RIL_WORD) && !it.IsAtBeginningOf(RIL_PARA)) do {
|
||||
StrongScriptDirection dir = it.WordDirection();
|
||||
num_rtl += (dir == DIR_RIGHT_TO_LEFT) ? 1 : 0;
|
||||
num_ltr += (dir == DIR_LEFT_TO_RIGHT) ? 1 : 0;
|
||||
} while (it.Next(RIL_WORD) && !it.IsAtBeginningOf(RIL_PARA));
|
||||
return num_ltr >= num_rtl;
|
||||
}
|
||||
|
||||
const int ResultIterator::kMinorRunStart = -1;
|
||||
const int ResultIterator::kMinorRunEnd = -2;
|
||||
const int ResultIterator::kComplexWord = -3;
|
||||
|
||||
void ResultIterator::CalculateBlobOrder(
|
||||
GenericVector<int> *blob_indices) const {
|
||||
bool context_is_ltr = current_paragraph_is_ltr_ ^ in_minor_direction_;
|
||||
blob_indices->clear();
|
||||
if (Empty(RIL_WORD)) return;
|
||||
if (context_is_ltr || it_->word()->UnicharsInReadingOrder()) {
|
||||
// Easy! just return the blobs in order;
|
||||
for (int i = 0; i < word_length_; i++)
|
||||
blob_indices->push_back(i);
|
||||
return;
|
||||
}
|
||||
|
||||
// The blobs are in left-to-right order, but the current reading context
|
||||
// is right-to-left.
|
||||
const int U_LTR = UNICHARSET::U_LEFT_TO_RIGHT;
|
||||
const int U_RTL = UNICHARSET::U_RIGHT_TO_LEFT;
|
||||
const int U_EURO_NUM = UNICHARSET::U_EUROPEAN_NUMBER;
|
||||
const int U_EURO_NUM_SEP = UNICHARSET::U_EUROPEAN_NUMBER_SEPARATOR;
|
||||
const int U_EURO_NUM_TERM = UNICHARSET::U_EUROPEAN_NUMBER_TERMINATOR;
|
||||
const int U_COMMON_NUM_SEP = UNICHARSET::U_COMMON_NUMBER_SEPARATOR;
|
||||
const int U_OTHER_NEUTRAL = UNICHARSET::U_OTHER_NEUTRAL;
|
||||
|
||||
// Step 1: Scan for and mark European Number sequences
|
||||
// [:ET:]*[:EN:]+(([:ES:]|[:CS:])?[:EN:]+)*[:ET:]*
|
||||
GenericVector<int> letter_types;
|
||||
for (int i = 0; i < word_length_; i++) {
|
||||
letter_types.push_back(it_->word()->SymbolDirection(i));
|
||||
}
|
||||
// Convert a single separtor sandwiched between two EN's into an EN.
|
||||
for (int i = 0; i + 2 < word_length_; i++) {
|
||||
if (letter_types[i] == U_EURO_NUM && letter_types[i + 2] == U_EURO_NUM &&
|
||||
(letter_types[i + 1] == U_EURO_NUM_SEP ||
|
||||
letter_types[i + 1] == U_COMMON_NUM_SEP)) {
|
||||
letter_types[i + 1] = U_EURO_NUM;
|
||||
}
|
||||
}
|
||||
// Scan for sequences of European Number Terminators around ENs and convert
|
||||
// them to ENs.
|
||||
for (int i = 0; i < word_length_; i++) {
|
||||
if (letter_types[i] == U_EURO_NUM_TERM) {
|
||||
int j = i + 1;
|
||||
while (j < word_length_ && letter_types[j] == U_EURO_NUM_TERM) { j++; }
|
||||
if (j < word_length_ && letter_types[j] == U_EURO_NUM) {
|
||||
// The sequence [i..j] should be converted to all European Numbers.
|
||||
for (int k = i; k < j; k++) letter_types[k] = U_EURO_NUM;
|
||||
}
|
||||
j = i - 1;
|
||||
while (j > -1 && letter_types[j] == U_EURO_NUM_TERM) { j--; }
|
||||
if (j > -1 && letter_types[j] == U_EURO_NUM) {
|
||||
// The sequence [j..i] should be converted to all European Numbers.
|
||||
for (int k = j; k <= i; k++) letter_types[k] = U_EURO_NUM;
|
||||
}
|
||||
}
|
||||
}
|
||||
// Step 2: Convert all remaining types to either L or R.
|
||||
// Sequences ([:L:]|[:EN:])+ (([:CS:]|[:ON:])+ ([:L:]|[:EN:])+)* -> L.
|
||||
// All other are R.
|
||||
for (int i = 0; i < word_length_;) {
|
||||
int ti = letter_types[i];
|
||||
if (ti == U_LTR || ti == U_EURO_NUM) {
|
||||
// Left to right sequence; scan to the end of it.
|
||||
int last_good = i;
|
||||
for (int j = i + 1; j < word_length_; j++) {
|
||||
int tj = letter_types[j];
|
||||
if (tj == U_LTR || tj == U_EURO_NUM) {
|
||||
last_good = j;
|
||||
}
|
||||
else if (tj == U_COMMON_NUM_SEP || tj == U_OTHER_NEUTRAL) {
|
||||
// do nothing.
|
||||
}
|
||||
else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
// [i..last_good] is the L sequence
|
||||
for (int k = i; k <= last_good; k++) letter_types[k] = U_LTR;
|
||||
i = last_good + 1;
|
||||
}
|
||||
else {
|
||||
letter_types[i] = U_RTL;
|
||||
i++;
|
||||
}
|
||||
}
|
||||
|
||||
// At this point, letter_types is entirely U_LTR or U_RTL.
|
||||
for (int i = word_length_ - 1; i >= 0;) {
|
||||
if (letter_types[i] == U_RTL) {
|
||||
blob_indices->push_back(i);
|
||||
i--;
|
||||
}
|
||||
else {
|
||||
// left to right sequence. scan to the beginning.
|
||||
int j = i - 1;
|
||||
for (; j >= 0 && letter_types[j] != U_RTL; j--) {} // pass
|
||||
// Now (j, i] is LTR
|
||||
for (int k = j + 1; k <= i; k++) blob_indices->push_back(k);
|
||||
i = j;
|
||||
}
|
||||
}
|
||||
ASSERT_HOST(blob_indices->size() == word_length_);
|
||||
}
|
||||
|
||||
static void PrintScriptDirs(const GenericVector<StrongScriptDirection> &dirs) {
|
||||
for (int i = 0; i < dirs.size(); i++) {
|
||||
switch (dirs[i]) {
|
||||
case DIR_NEUTRAL: tprintf("N "); break;
|
||||
case DIR_LEFT_TO_RIGHT: tprintf("L "); break;
|
||||
case DIR_RIGHT_TO_LEFT: tprintf("R "); break;
|
||||
case DIR_MIX: tprintf("Z "); break;
|
||||
default: tprintf("? "); break;
|
||||
}
|
||||
}
|
||||
tprintf("\n");
|
||||
}
|
||||
|
||||
void ResultIterator::CalculateTextlineOrder(
|
||||
bool paragraph_is_ltr,
|
||||
const LTRResultIterator &resit,
|
||||
GenericVectorEqEq<int> *word_indices) const {
|
||||
GenericVector<StrongScriptDirection> directions;
|
||||
CalculateTextlineOrder(paragraph_is_ltr, resit, &directions, word_indices);
|
||||
}
|
||||
|
||||
void ResultIterator::CalculateTextlineOrder(
|
||||
bool paragraph_is_ltr,
|
||||
const LTRResultIterator &resit,
|
||||
GenericVector<StrongScriptDirection> *dirs_arg,
|
||||
GenericVectorEqEq<int> *word_indices) const {
|
||||
GenericVector<StrongScriptDirection> dirs;
|
||||
GenericVector<StrongScriptDirection> *directions;
|
||||
directions = (dirs_arg != NULL) ? dirs_arg : &dirs;
|
||||
directions->truncate(0);
|
||||
|
||||
// A LTRResultIterator goes strictly left-to-right word order.
|
||||
LTRResultIterator ltr_it(resit);
|
||||
ltr_it.RestartRow();
|
||||
if (ltr_it.Empty(RIL_WORD)) return;
|
||||
do {
|
||||
directions->push_back(ltr_it.WordDirection());
|
||||
} while (ltr_it.Next(RIL_WORD) && !ltr_it.IsAtBeginningOf(RIL_TEXTLINE));
|
||||
|
||||
word_indices->truncate(0);
|
||||
CalculateTextlineOrder(paragraph_is_ltr, *directions, word_indices);
|
||||
}
|
||||
|
||||
void ResultIterator::CalculateTextlineOrder(
|
||||
bool paragraph_is_ltr,
|
||||
const GenericVector<StrongScriptDirection> &word_dirs,
|
||||
GenericVectorEqEq<int> *reading_order) {
|
||||
reading_order->truncate(0);
|
||||
if (word_dirs.size() == 0) return;
|
||||
|
||||
// Take all of the runs of minor direction words and insert them
|
||||
// in reverse order.
|
||||
int minor_direction, major_direction, major_step, start, end;
|
||||
if (paragraph_is_ltr) {
|
||||
start = 0;
|
||||
end = word_dirs.size();
|
||||
major_step = 1;
|
||||
major_direction = DIR_LEFT_TO_RIGHT;
|
||||
minor_direction = DIR_RIGHT_TO_LEFT;
|
||||
}
|
||||
else {
|
||||
start = word_dirs.size() - 1;
|
||||
end = -1;
|
||||
major_step = -1;
|
||||
major_direction = DIR_RIGHT_TO_LEFT;
|
||||
minor_direction = DIR_LEFT_TO_RIGHT;
|
||||
// Special rule: if there are neutral words at the right most side
|
||||
// of a line adjacent to a left-to-right word in the middle of the
|
||||
// line, we interpret the end of the line as a single LTR sequence.
|
||||
if (word_dirs[start] == DIR_NEUTRAL) {
|
||||
int neutral_end = start;
|
||||
while (neutral_end > 0 && word_dirs[neutral_end] == DIR_NEUTRAL) {
|
||||
neutral_end--;
|
||||
}
|
||||
if (neutral_end >= 0 && word_dirs[neutral_end] == DIR_LEFT_TO_RIGHT) {
|
||||
// LTR followed by neutrals.
|
||||
// Scan for the beginning of the minor left-to-right run.
|
||||
int left = neutral_end;
|
||||
for (int i = left; i >= 0 && word_dirs[i] != DIR_RIGHT_TO_LEFT; i--) {
|
||||
if (word_dirs[i] == DIR_LEFT_TO_RIGHT) left = i;
|
||||
}
|
||||
reading_order->push_back(kMinorRunStart);
|
||||
for (int i = left; i < word_dirs.size(); i++) {
|
||||
reading_order->push_back(i);
|
||||
if (word_dirs[i] == DIR_MIX) reading_order->push_back(kComplexWord);
|
||||
}
|
||||
reading_order->push_back(kMinorRunEnd);
|
||||
start = left - 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
for (int i = start; i != end;) {
|
||||
if (word_dirs[i] == minor_direction) {
|
||||
int j = i;
|
||||
while (j != end && word_dirs[j] != major_direction)
|
||||
j += major_step;
|
||||
if (j == end) j -= major_step;
|
||||
while (j != i && word_dirs[j] != minor_direction)
|
||||
j -= major_step;
|
||||
// [j..i] is a minor direction run.
|
||||
reading_order->push_back(kMinorRunStart);
|
||||
for (int k = j; k != i; k -= major_step) {
|
||||
reading_order->push_back(k);
|
||||
}
|
||||
reading_order->push_back(i);
|
||||
reading_order->push_back(kMinorRunEnd);
|
||||
i = j + major_step;
|
||||
}
|
||||
else {
|
||||
reading_order->push_back(i);
|
||||
if (word_dirs[i] == DIR_MIX) reading_order->push_back(kComplexWord);
|
||||
i += major_step;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
int ResultIterator::LTRWordIndex() const {
|
||||
int this_word_index = 0;
|
||||
LTRResultIterator textline(*this);
|
||||
textline.RestartRow();
|
||||
while (!textline.PositionedAtSameWord(it_)) {
|
||||
this_word_index++;
|
||||
textline.Next(RIL_WORD);
|
||||
}
|
||||
return this_word_index;
|
||||
}
|
||||
|
||||
void ResultIterator::MoveToLogicalStartOfWord() {
|
||||
if (word_length_ == 0) {
|
||||
BeginWord(0);
|
||||
return;
|
||||
}
|
||||
GenericVector<int> blob_order;
|
||||
CalculateBlobOrder(&blob_order);
|
||||
if (blob_order.size() == 0 || blob_order[0] == 0) return;
|
||||
BeginWord(blob_order[0]);
|
||||
}
|
||||
|
||||
bool ResultIterator::IsAtFinalSymbolOfWord() const {
|
||||
if (!it_->word()) return true;
|
||||
GenericVector<int> blob_order;
|
||||
CalculateBlobOrder(&blob_order);
|
||||
return blob_order.size() == 0 || blob_order.back() == blob_index_;
|
||||
}
|
||||
|
||||
bool ResultIterator::IsAtFirstSymbolOfWord() const {
|
||||
if (!it_->word()) return true;
|
||||
GenericVector<int> blob_order;
|
||||
CalculateBlobOrder(&blob_order);
|
||||
return blob_order.size() == 0 || blob_order[0] == blob_index_;
|
||||
}
|
||||
|
||||
void ResultIterator::AppendSuffixMarks(STRING *text) const {
|
||||
if (!it_->word()) return;
|
||||
bool reading_direction_is_ltr =
|
||||
current_paragraph_is_ltr_ ^ in_minor_direction_;
|
||||
// scan forward to see what meta-information the word ordering algorithm
|
||||
// left us.
|
||||
// If this word is at the *end* of a minor run, insert the other
|
||||
// direction's mark; else if this was a complex word, insert the
|
||||
// current reading order's mark.
|
||||
GenericVectorEqEq<int> textline_order;
|
||||
CalculateTextlineOrder(current_paragraph_is_ltr_,
|
||||
*this, &textline_order);
|
||||
int this_word_index = LTRWordIndex();
|
||||
int i = textline_order.get_index(this_word_index);
|
||||
if (i < 0) return;
|
||||
|
||||
int last_non_word_mark = 0;
|
||||
for (i++; i < textline_order.size() && textline_order[i] < 0; i++) {
|
||||
last_non_word_mark = textline_order[i];
|
||||
}
|
||||
if (last_non_word_mark == kComplexWord) {
|
||||
*text += reading_direction_is_ltr ? kLRM : kRLM;
|
||||
}
|
||||
else if (last_non_word_mark == kMinorRunEnd) {
|
||||
if (current_paragraph_is_ltr_) {
|
||||
*text += kLRM;
|
||||
}
|
||||
else {
|
||||
*text += kRLM;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void ResultIterator::MoveToLogicalStartOfTextline() {
|
||||
GenericVectorEqEq<int> word_indices;
|
||||
RestartRow();
|
||||
CalculateTextlineOrder(current_paragraph_is_ltr_,
|
||||
dynamic_cast<const LTRResultIterator&>(*this),
|
||||
&word_indices);
|
||||
int i = 0;
|
||||
for (; i < word_indices.size() && word_indices[i] < 0; i++) {
|
||||
if (word_indices[i] == kMinorRunStart) in_minor_direction_ = true;
|
||||
else if (word_indices[i] == kMinorRunEnd) in_minor_direction_ = false;
|
||||
}
|
||||
if (in_minor_direction_) at_beginning_of_minor_run_ = true;
|
||||
if (i >= word_indices.size()) return;
|
||||
int first_word_index = word_indices[i];
|
||||
for (int j = 0; j < first_word_index; j++) {
|
||||
PageIterator::Next(RIL_WORD);
|
||||
}
|
||||
MoveToLogicalStartOfWord();
|
||||
}
|
||||
|
||||
void ResultIterator::Begin() {
|
||||
LTRResultIterator::Begin();
|
||||
current_paragraph_is_ltr_ = CurrentParagraphIsLtr();
|
||||
in_minor_direction_ = false;
|
||||
at_beginning_of_minor_run_ = false;
|
||||
MoveToLogicalStartOfTextline();
|
||||
}
|
||||
|
||||
bool ResultIterator::Next(PageIteratorLevel level) {
|
||||
if (it_->block() == NULL) return false; // already at end!
|
||||
switch (level) {
|
||||
case RIL_BLOCK: // explicit fall-through
|
||||
case RIL_PARA: // explicit fall-through
|
||||
case RIL_TEXTLINE:
|
||||
if (!PageIterator::Next(level)) return false;
|
||||
if (IsWithinFirstTextlineOfParagraph()) {
|
||||
// if we've advanced to a new paragraph,
|
||||
// recalculate current_paragraph_is_ltr_
|
||||
current_paragraph_is_ltr_ = CurrentParagraphIsLtr();
|
||||
}
|
||||
in_minor_direction_ = false;
|
||||
MoveToLogicalStartOfTextline();
|
||||
return it_->block() != NULL;
|
||||
case RIL_SYMBOL:
|
||||
{
|
||||
GenericVector<int> blob_order;
|
||||
CalculateBlobOrder(&blob_order);
|
||||
int next_blob = 0;
|
||||
while (next_blob < blob_order.size() &&
|
||||
blob_index_ != blob_order[next_blob])
|
||||
next_blob++;
|
||||
next_blob++;
|
||||
if (next_blob < blob_order.size()) {
|
||||
// we're in the same word; simply advance one blob.
|
||||
BeginWord(blob_order[next_blob]);
|
||||
at_beginning_of_minor_run_ = false;
|
||||
return true;
|
||||
}
|
||||
level = RIL_WORD; // we've fallen through to the next word.
|
||||
}
|
||||
case RIL_WORD: // explicit fall-through.
|
||||
{
|
||||
if (it_->word() == NULL) return Next(RIL_BLOCK);
|
||||
GenericVectorEqEq<int> word_indices;
|
||||
int this_word_index = LTRWordIndex();
|
||||
CalculateTextlineOrder(current_paragraph_is_ltr_,
|
||||
*this,
|
||||
&word_indices);
|
||||
int final_real_index = word_indices.size() - 1;
|
||||
while (final_real_index > 0 && word_indices[final_real_index] < 0)
|
||||
final_real_index--;
|
||||
for (int i = 0; i < final_real_index; i++) {
|
||||
if (word_indices[i] == this_word_index) {
|
||||
int j = i + 1;
|
||||
for (; j < final_real_index && word_indices[j] < 0; j++) {
|
||||
if (word_indices[j] == kMinorRunStart) in_minor_direction_ = true;
|
||||
if (word_indices[j] == kMinorRunEnd) in_minor_direction_ = false;
|
||||
}
|
||||
at_beginning_of_minor_run_ = (word_indices[j - 1] == kMinorRunStart);
|
||||
// awesome, we move to word_indices[j]
|
||||
if (BidiDebug(3)) {
|
||||
tprintf("Next(RIL_WORD): %d -> %d\n",
|
||||
this_word_index, word_indices[j]);
|
||||
}
|
||||
PageIterator::RestartRow();
|
||||
for (int k = 0; k < word_indices[j]; k++) {
|
||||
PageIterator::Next(RIL_WORD);
|
||||
}
|
||||
MoveToLogicalStartOfWord();
|
||||
return true;
|
||||
}
|
||||
}
|
||||
if (BidiDebug(3)) {
|
||||
tprintf("Next(RIL_WORD): %d -> EOL\n", this_word_index);
|
||||
}
|
||||
// we're going off the end of the text line.
|
||||
return Next(RIL_TEXTLINE);
|
||||
}
|
||||
}
|
||||
ASSERT_HOST(false); // shouldn't happen.
|
||||
return false;
|
||||
}
|
||||
|
||||
bool ResultIterator::IsAtBeginningOf(PageIteratorLevel level) const {
|
||||
if (it_->block() == NULL) return false; // Already at the end!
|
||||
if (it_->word() == NULL) return true; // In an image block.
|
||||
if (level == RIL_SYMBOL) return true; // Always at beginning of a symbol.
|
||||
|
||||
bool at_word_start = IsAtFirstSymbolOfWord();
|
||||
if (level == RIL_WORD) return at_word_start;
|
||||
|
||||
ResultIterator line_start(*this);
|
||||
// move to the first word in the line...
|
||||
line_start.MoveToLogicalStartOfTextline();
|
||||
|
||||
bool at_textline_start = at_word_start && *line_start.it_ == *it_;
|
||||
if (level == RIL_TEXTLINE) return at_textline_start;
|
||||
|
||||
// now we move to the left-most word...
|
||||
line_start.RestartRow();
|
||||
bool at_block_start = at_textline_start &&
|
||||
line_start.it_->block() != line_start.it_->prev_block();
|
||||
if (level == RIL_BLOCK) return at_block_start;
|
||||
|
||||
bool at_para_start = at_block_start ||
|
||||
(at_textline_start &&
|
||||
line_start.it_->row()->row->para() !=
|
||||
line_start.it_->prev_row()->row->para());
|
||||
if (level == RIL_PARA) return at_para_start;
|
||||
|
||||
ASSERT_HOST(false); // shouldn't happen.
|
||||
return false;
|
||||
}
|
||||
|
||||
/**
|
||||
* NOTE! This is an exact copy of PageIterator::IsAtFinalElement with the
|
||||
* change that the variable next is now a ResultIterator instead of a
|
||||
* PageIterator.
|
||||
*/
|
||||
bool ResultIterator::IsAtFinalElement(PageIteratorLevel level,
|
||||
PageIteratorLevel element) const {
|
||||
if (Empty(element)) return true; // Already at the end!
|
||||
// The result is true if we step forward by element and find we are
|
||||
// at the the end of the page or at beginning of *all* levels in:
|
||||
// [level, element).
|
||||
// When there is more than one level difference between element and level,
|
||||
// we could for instance move forward one symbol and still be at the first
|
||||
// word on a line, so we also have to be at the first symbol in a word.
|
||||
ResultIterator next(*this);
|
||||
next.Next(element);
|
||||
if (next.Empty(element)) return true; // Reached the end of the page.
|
||||
while (element > level) {
|
||||
element = static_cast<PageIteratorLevel>(element - 1);
|
||||
if (!next.IsAtBeginningOf(element))
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the null terminated UTF-8 encoded text string for the current
|
||||
* object at the given level. Use delete [] to free after use.
|
||||
*/
|
||||
char* ResultIterator::GetUTF8Text(PageIteratorLevel level) const {
|
||||
if (it_->word() == NULL) return NULL; // Already at the end!
|
||||
STRING text;
|
||||
switch (level) {
|
||||
case RIL_BLOCK:
|
||||
{
|
||||
ResultIterator pp(*this);
|
||||
do {
|
||||
pp.AppendUTF8ParagraphText(&text);
|
||||
} while (pp.Next(RIL_PARA) && pp.it_->block() == it_->block());
|
||||
}
|
||||
break;
|
||||
case RIL_PARA:
|
||||
AppendUTF8ParagraphText(&text);
|
||||
break;
|
||||
case RIL_TEXTLINE:
|
||||
{
|
||||
ResultIterator it(*this);
|
||||
it.MoveToLogicalStartOfTextline();
|
||||
it.IterateAndAppendUTF8TextlineText(&text);
|
||||
}
|
||||
break;
|
||||
case RIL_WORD:
|
||||
AppendUTF8WordText(&text);
|
||||
break;
|
||||
case RIL_SYMBOL:
|
||||
{
|
||||
bool reading_direction_is_ltr =
|
||||
current_paragraph_is_ltr_ ^ in_minor_direction_;
|
||||
if (at_beginning_of_minor_run_) {
|
||||
text += reading_direction_is_ltr ? kLRM : kRLM;
|
||||
}
|
||||
text = it_->word()->BestUTF8(blob_index_, !reading_direction_is_ltr);
|
||||
if (IsAtFinalSymbolOfWord()) AppendSuffixMarks(&text);
|
||||
}
|
||||
break;
|
||||
}
|
||||
int length = text.length() + 1;
|
||||
char* result = new char[length];
|
||||
strncpy(result, text.string(), length);
|
||||
return result;
|
||||
}
|
||||
|
||||
void ResultIterator::AppendUTF8WordText(STRING *text) const {
|
||||
if (!it_->word()) return;
|
||||
ASSERT_HOST(it_->word()->best_choice != NULL);
|
||||
bool reading_direction_is_ltr =
|
||||
current_paragraph_is_ltr_ ^ in_minor_direction_;
|
||||
if (at_beginning_of_minor_run_) {
|
||||
*text += reading_direction_is_ltr ? kLRM : kRLM;
|
||||
}
|
||||
|
||||
GenericVector<int> blob_order;
|
||||
CalculateBlobOrder(&blob_order);
|
||||
for (int i = 0; i < blob_order.size(); i++) {
|
||||
*text += it_->word()->BestUTF8(blob_order[i], !reading_direction_is_ltr);
|
||||
}
|
||||
AppendSuffixMarks(text);
|
||||
}
|
||||
|
||||
void ResultIterator::IterateAndAppendUTF8TextlineText(STRING *text) {
|
||||
if (Empty(RIL_WORD)) {
|
||||
Next(RIL_WORD);
|
||||
return;
|
||||
}
|
||||
if (BidiDebug(1)) {
|
||||
GenericVectorEqEq<int> textline_order;
|
||||
GenericVector<StrongScriptDirection> dirs;
|
||||
CalculateTextlineOrder(current_paragraph_is_ltr_,
|
||||
*this, &dirs, &textline_order);
|
||||
tprintf("Strong Script dirs [%p/P=%s]: ", it_->row(),
|
||||
current_paragraph_is_ltr_ ? "ltr" : "rtl");
|
||||
PrintScriptDirs(dirs);
|
||||
tprintf("Logical textline order [%p/P=%s]: ", it_->row(),
|
||||
current_paragraph_is_ltr_ ? "ltr" : "rtl");
|
||||
for (int i = 0; i < textline_order.size(); i++) {
|
||||
tprintf("%d ", textline_order[i]);
|
||||
}
|
||||
tprintf("\n");
|
||||
}
|
||||
|
||||
int words_appended = 0;
|
||||
do {
|
||||
int numSpaces = preserve_interword_spaces_ ? it_->word()->word->space()
|
||||
: (words_appended > 0);
|
||||
for (int i = 0; i < numSpaces; ++i) {
|
||||
*text += " ";
|
||||
}
|
||||
AppendUTF8WordText(text);
|
||||
words_appended++;
|
||||
} while (Next(RIL_WORD) && !IsAtBeginningOf(RIL_TEXTLINE));
|
||||
if (BidiDebug(1)) {
|
||||
tprintf("%d words printed\n", words_appended);
|
||||
}
|
||||
*text += line_separator_;
|
||||
// If we just finished a paragraph, add an extra newline.
|
||||
if (it_->block() == NULL || IsAtBeginningOf(RIL_PARA))
|
||||
*text += paragraph_separator_;
|
||||
}
|
||||
|
||||
void ResultIterator::AppendUTF8ParagraphText(STRING *text) const {
|
||||
ResultIterator it(*this);
|
||||
it.RestartParagraph();
|
||||
it.MoveToLogicalStartOfTextline();
|
||||
if (it.Empty(RIL_WORD)) return;
|
||||
do {
|
||||
it.IterateAndAppendUTF8TextlineText(text);
|
||||
} while (it.it_->block() != NULL && !it.IsAtBeginningOf(RIL_PARA));
|
||||
}
|
||||
|
||||
bool ResultIterator::BidiDebug(int min_level) const {
|
||||
int debug_level = 1;
|
||||
IntParam *p = ParamUtils::FindParam<IntParam>(
|
||||
"bidi_debug", GlobalParams()->int_params,
|
||||
tesseract_->params()->int_params);
|
||||
if (p != NULL) debug_level = (inT32)(*p);
|
||||
return debug_level >= min_level;
|
||||
}
|
||||
|
||||
} // namespace tesseract.
|
|
@ -0,0 +1,244 @@
|
|||
///////////////////////////////////////////////////////////////////////
|
||||
// File: resultiterator.h
|
||||
// Description: Iterator for tesseract results that is capable of
|
||||
// iterating in proper reading order over Bi Directional
|
||||
// (e.g. mixed Hebrew and English) text.
|
||||
// Author: David Eger
|
||||
// Created: Fri May 27 13:58:06 PST 2011
|
||||
//
|
||||
// (C) Copyright 2011, Google Inc.
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
///////////////////////////////////////////////////////////////////////
|
||||
|
||||
#ifndef TESSERACT_CCMAIN_RESULT_ITERATOR_H__
|
||||
#define TESSERACT_CCMAIN_RESULT_ITERATOR_H__
|
||||
|
||||
#include "platform.h"
|
||||
#include "ltrresultiterator.h"
|
||||
|
||||
template <typename T> class GenericVector;
|
||||
template <typename T> class GenericVectorEqEq;
|
||||
class BLOB_CHOICE_IT;
|
||||
class WERD_RES;
|
||||
class STRING;
|
||||
|
||||
namespace tesseract {
|
||||
|
||||
class Tesseract;
|
||||
|
||||
class TESS_API ResultIterator : public LTRResultIterator {
|
||||
public:
|
||||
static ResultIterator *StartOfParagraph(const LTRResultIterator &resit);
|
||||
|
||||
/**
|
||||
* ResultIterator is copy constructible!
|
||||
* The default copy constructor works just fine for us.
|
||||
*/
|
||||
virtual ~ResultIterator() {}
|
||||
|
||||
// ============= Moving around within the page ============.
|
||||
/**
|
||||
* Moves the iterator to point to the start of the page to begin
|
||||
* an iteration.
|
||||
*/
|
||||
virtual void Begin();
|
||||
|
||||
/**
|
||||
* Moves to the start of the next object at the given level in the
|
||||
* page hierarchy in the appropriate reading order and returns false if
|
||||
* the end of the page was reached.
|
||||
* NOTE that RIL_SYMBOL will skip non-text blocks, but all other
|
||||
* PageIteratorLevel level values will visit each non-text block once.
|
||||
* Think of non text blocks as containing a single para, with a single line,
|
||||
* with a single imaginary word.
|
||||
* Calls to Next with different levels may be freely intermixed.
|
||||
* This function iterates words in right-to-left scripts correctly, if
|
||||
* the appropriate language has been loaded into Tesseract.
|
||||
*/
|
||||
virtual bool Next(PageIteratorLevel level);
|
||||
|
||||
/**
|
||||
* IsAtBeginningOf() returns whether we're at the logical beginning of the
|
||||
* given level. (as opposed to ResultIterator's left-to-right top-to-bottom
|
||||
* order). Otherwise, this acts the same as PageIterator::IsAtBeginningOf().
|
||||
* For a full description, see pageiterator.h
|
||||
*/
|
||||
virtual bool IsAtBeginningOf(PageIteratorLevel level) const;
|
||||
|
||||
/**
|
||||
* Implement PageIterator's IsAtFinalElement correctly in a BiDi context.
|
||||
* For instance, IsAtFinalElement(RIL_PARA, RIL_WORD) returns whether we
|
||||
* point at the last word in a paragraph. See PageIterator for full comment.
|
||||
*/
|
||||
virtual bool IsAtFinalElement(PageIteratorLevel level,
|
||||
PageIteratorLevel element) const;
|
||||
|
||||
// ============= Accessing data ==============.
|
||||
|
||||
/**
|
||||
* Returns the null terminated UTF-8 encoded text string for the current
|
||||
* object at the given level. Use delete [] to free after use.
|
||||
*/
|
||||
virtual char* GetUTF8Text(PageIteratorLevel level) const;
|
||||
|
||||
/**
|
||||
* Return whether the current paragraph's dominant reading direction
|
||||
* is left-to-right (as opposed to right-to-left).
|
||||
*/
|
||||
bool ParagraphIsLtr() const;
|
||||
|
||||
// ============= Exposed only for testing =============.
|
||||
|
||||
/**
|
||||
* Yields the reading order as a sequence of indices and (optional)
|
||||
* meta-marks for a set of words (given left-to-right).
|
||||
* The meta marks are passed as negative values:
|
||||
* kMinorRunStart Start of minor direction text.
|
||||
* kMinorRunEnd End of minor direction text.
|
||||
* kComplexWord The next indexed word contains both left-to-right and
|
||||
* right-to-left characters and was treated as neutral.
|
||||
*
|
||||
* For example, suppose we have five words in a text line,
|
||||
* indexed [0,1,2,3,4] from the leftmost side of the text line.
|
||||
* The following are all believable reading_orders:
|
||||
*
|
||||
* Left-to-Right (in ltr paragraph):
|
||||
* { 0, 1, 2, 3, 4 }
|
||||
* Left-to-Right (in rtl paragraph):
|
||||
* { kMinorRunStart, 0, 1, 2, 3, 4, kMinorRunEnd }
|
||||
* Right-to-Left (in rtl paragraph):
|
||||
* { 4, 3, 2, 1, 0 }
|
||||
* Left-to-Right except for an RTL phrase in words 2, 3 in an ltr paragraph:
|
||||
* { 0, 1, kMinorRunStart, 3, 2, kMinorRunEnd, 4 }
|
||||
*/
|
||||
static void CalculateTextlineOrder(
|
||||
bool paragraph_is_ltr,
|
||||
const GenericVector<StrongScriptDirection> &word_dirs,
|
||||
GenericVectorEqEq<int> *reading_order);
|
||||
|
||||
static const int kMinorRunStart;
|
||||
static const int kMinorRunEnd;
|
||||
static const int kComplexWord;
|
||||
|
||||
protected:
|
||||
/**
|
||||
* We presume the data associated with the given iterator will outlive us.
|
||||
* NB: This is private because it does something that is non-obvious:
|
||||
* it resets to the beginning of the paragraph instead of staying wherever
|
||||
* resit might have pointed.
|
||||
*/
|
||||
TESS_LOCAL explicit ResultIterator(const LTRResultIterator &resit);
|
||||
|
||||
private:
|
||||
/**
|
||||
* Calculates the current paragraph's dominant writing direction.
|
||||
* Typically, members should use current_paragraph_ltr_ instead.
|
||||
*/
|
||||
bool CurrentParagraphIsLtr() const;
|
||||
|
||||
/**
|
||||
* Returns word indices as measured from resit->RestartRow() = index 0
|
||||
* for the reading order of words within a textline given an iterator
|
||||
* into the middle of the text line.
|
||||
* In addition to non-negative word indices, the following negative values
|
||||
* may be inserted:
|
||||
* kMinorRunStart Start of minor direction text.
|
||||
* kMinorRunEnd End of minor direction text.
|
||||
* kComplexWord The previous word contains both left-to-right and
|
||||
* right-to-left characters and was treated as neutral.
|
||||
*/
|
||||
void CalculateTextlineOrder(bool paragraph_is_ltr,
|
||||
const LTRResultIterator &resit,
|
||||
GenericVectorEqEq<int> *indices) const;
|
||||
/** Same as above, but the caller's ssd gets filled in if ssd != NULL. */
|
||||
void CalculateTextlineOrder(bool paragraph_is_ltr,
|
||||
const LTRResultIterator &resit,
|
||||
GenericVector<StrongScriptDirection> *ssd,
|
||||
GenericVectorEqEq<int> *indices) const;
|
||||
|
||||
/**
|
||||
* What is the index of the current word in a strict left-to-right reading
|
||||
* of the row?
|
||||
*/
|
||||
int LTRWordIndex() const;
|
||||
|
||||
/**
|
||||
* Given an iterator pointing at a word, returns the logical reading order
|
||||
* of blob indices for the word.
|
||||
*/
|
||||
void CalculateBlobOrder(GenericVector<int> *blob_indices) const;
|
||||
|
||||
/** Precondition: current_paragraph_is_ltr_ is set. */
|
||||
void MoveToLogicalStartOfTextline();
|
||||
|
||||
/**
|
||||
* Precondition: current_paragraph_is_ltr_ and in_minor_direction_
|
||||
* are set.
|
||||
*/
|
||||
void MoveToLogicalStartOfWord();
|
||||
|
||||
/** Are we pointing at the final (reading order) symbol of the word? */
|
||||
bool IsAtFinalSymbolOfWord() const;
|
||||
|
||||
/** Are we pointing at the first (reading order) symbol of the word? */
|
||||
bool IsAtFirstSymbolOfWord() const;
|
||||
|
||||
/**
|
||||
* Append any extra marks that should be appended to this word when printed.
|
||||
* Mostly, these are Unicode BiDi control characters.
|
||||
*/
|
||||
void AppendSuffixMarks(STRING *text) const;
|
||||
|
||||
/** Appends the current word in reading order to the given buffer.*/
|
||||
void AppendUTF8WordText(STRING *text) const;
|
||||
|
||||
/**
|
||||
* Appends the text of the current text line, *assuming this iterator is
|
||||
* positioned at the beginning of the text line* This function
|
||||
* updates the iterator to point to the first position past the text line.
|
||||
* Each textline is terminated in a single newline character.
|
||||
* If the textline ends a paragraph, it gets a second terminal newline.
|
||||
*/
|
||||
void IterateAndAppendUTF8TextlineText(STRING *text);
|
||||
|
||||
/**
|
||||
* Appends the text of the current paragraph in reading order
|
||||
* to the given buffer.
|
||||
* Each textline is terminated in a single newline character, and the
|
||||
* paragraph gets an extra newline at the end.
|
||||
*/
|
||||
void AppendUTF8ParagraphText(STRING *text) const;
|
||||
|
||||
/** Returns whether the bidi_debug flag is set to at least min_level. */
|
||||
bool BidiDebug(int min_level) const;
|
||||
|
||||
bool current_paragraph_is_ltr_;
|
||||
|
||||
/**
|
||||
* Is the currently pointed-at character at the beginning of
|
||||
* a minor-direction run?
|
||||
*/
|
||||
bool at_beginning_of_minor_run_;
|
||||
|
||||
/** Is the currently pointed-at character in a minor-direction sequence? */
|
||||
bool in_minor_direction_;
|
||||
|
||||
/**
|
||||
* Should detected inter-word spaces be preserved, or "compressed" to a single
|
||||
* space character (default behavior).
|
||||
*/
|
||||
bool preserve_interword_spaces_;
|
||||
};
|
||||
|
||||
} // namespace tesseract.
|
||||
|
||||
#endif // TESSERACT_CCMAIN_RESULT_ITERATOR_H__
|
|
@ -0,0 +1,619 @@
|
|||
/******************************************************************
|
||||
* File: superscript.cpp
|
||||
* Description: Correction pass to fix superscripts and subscripts.
|
||||
* Author: David Eger
|
||||
* Created: Mon Mar 12 14:05:00 PDT 2012
|
||||
*
|
||||
* (C) Copyright 2012, Google, Inc.
|
||||
** Licensed under the Apache License, Version 2.0 (the "License");
|
||||
** you may not use this file except in compliance with the License.
|
||||
** You may obtain a copy of the License at
|
||||
** http://www.apache.org/licenses/LICENSE-2.0
|
||||
** Unless required by applicable law or agreed to in writing, software
|
||||
** distributed under the License is distributed on an "AS IS" BASIS,
|
||||
** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
** See the License for the specific language governing permissions and
|
||||
** limitations under the License.
|
||||
*
|
||||
**********************************************************************/
|
||||
|
||||
#include "normalis.h"
|
||||
#include "tesseractclass.h"
|
||||
|
||||
static int LeadingUnicharsToChopped(WERD_RES *word, int num_unichars) {
|
||||
int num_chopped = 0;
|
||||
for (int i = 0; i < num_unichars; i++)
|
||||
num_chopped += word->best_state[i];
|
||||
return num_chopped;
|
||||
}
|
||||
|
||||
static int TrailingUnicharsToChopped(WERD_RES *word, int num_unichars) {
|
||||
int num_chopped = 0;
|
||||
for (int i = 0; i < num_unichars; i++)
|
||||
num_chopped += word->best_state[word->best_state.size() - 1 - i];
|
||||
return num_chopped;
|
||||
}
|
||||
|
||||
|
||||
namespace tesseract {
|
||||
|
||||
/**
|
||||
* Given a recognized blob, see if a contiguous collection of sub-pieces
|
||||
* (chopped blobs) starting at its left might qualify as being a subscript
|
||||
* or superscript letter based only on y position. Also do this for the
|
||||
* right side.
|
||||
*/
|
||||
void YOutlierPieces(WERD_RES *word, int rebuilt_blob_index,
|
||||
int super_y_bottom, int sub_y_top,
|
||||
ScriptPos *leading_pos, int *num_leading_outliers,
|
||||
ScriptPos *trailing_pos, int *num_trailing_outliers) {
|
||||
ScriptPos sp_unused1, sp_unused2;
|
||||
int unused1, unused2;
|
||||
if (!leading_pos) leading_pos = &sp_unused1;
|
||||
if (!num_leading_outliers) num_leading_outliers = &unused1;
|
||||
if (!trailing_pos) trailing_pos = &sp_unused2;
|
||||
if (!num_trailing_outliers) num_trailing_outliers = &unused2;
|
||||
|
||||
*num_leading_outliers = *num_trailing_outliers = 0;
|
||||
*leading_pos = *trailing_pos = SP_NORMAL;
|
||||
|
||||
int chopped_start = LeadingUnicharsToChopped(word, rebuilt_blob_index);
|
||||
int num_chopped_pieces = word->best_state[rebuilt_blob_index];
|
||||
ScriptPos last_pos = SP_NORMAL;
|
||||
int trailing_outliers = 0;
|
||||
for (int i = 0; i < num_chopped_pieces; i++) {
|
||||
TBOX box = word->chopped_word->blobs[chopped_start + i]->bounding_box();
|
||||
ScriptPos pos = SP_NORMAL;
|
||||
if (box.bottom() >= super_y_bottom) {
|
||||
pos = SP_SUPERSCRIPT;
|
||||
}
|
||||
else if (box.top() <= sub_y_top) {
|
||||
pos = SP_SUBSCRIPT;
|
||||
}
|
||||
if (pos == SP_NORMAL) {
|
||||
if (trailing_outliers == i) {
|
||||
*num_leading_outliers = trailing_outliers;
|
||||
*leading_pos = last_pos;
|
||||
}
|
||||
trailing_outliers = 0;
|
||||
}
|
||||
else {
|
||||
if (pos == last_pos) {
|
||||
trailing_outliers++;
|
||||
}
|
||||
else {
|
||||
trailing_outliers = 1;
|
||||
}
|
||||
}
|
||||
last_pos = pos;
|
||||
}
|
||||
*num_trailing_outliers = trailing_outliers;
|
||||
*trailing_pos = last_pos;
|
||||
}
|
||||
|
||||
/**
|
||||
* Attempt to split off any high (or low) bits at the ends of the word with poor
|
||||
* certainty and recognize them separately. If the certainty gets much better
|
||||
* and other sanity checks pass, acccept.
|
||||
*
|
||||
* This superscript fix is meant to be called in the second pass of recognition
|
||||
* when we have tried once and already have a preliminary answer for word.
|
||||
*
|
||||
* @return Whether we modified the given word.
|
||||
*/
|
||||
bool Tesseract::SubAndSuperscriptFix(WERD_RES *word) {
|
||||
if (word->tess_failed || word->word->flag(W_REP_CHAR) ||
|
||||
!word->best_choice) {
|
||||
return false;
|
||||
}
|
||||
int num_leading, num_trailing;
|
||||
ScriptPos sp_leading, sp_trailing;
|
||||
float leading_certainty, trailing_certainty;
|
||||
float avg_certainty, unlikely_threshold;
|
||||
|
||||
// Calculate the number of whole suspicious characters at the edges.
|
||||
GetSubAndSuperscriptCandidates(
|
||||
word, &num_leading, &sp_leading, &leading_certainty,
|
||||
&num_trailing, &sp_trailing, &trailing_certainty,
|
||||
&avg_certainty, &unlikely_threshold);
|
||||
|
||||
const char *leading_pos = sp_leading == SP_SUBSCRIPT ? "sub" : "super";
|
||||
const char *trailing_pos = sp_trailing == SP_SUBSCRIPT ? "sub" : "super";
|
||||
|
||||
int num_blobs = word->best_choice->length();
|
||||
|
||||
// Calculate the remainder (partial characters) at the edges.
|
||||
// This accounts for us having classified the best version of
|
||||
// a word as [speaker?'] when it was instead [speaker.^{21}]
|
||||
// (that is we accidentally thought the 2 was attached to the period).
|
||||
int num_remainder_leading = 0, num_remainder_trailing = 0;
|
||||
if (num_leading + num_trailing < num_blobs && unlikely_threshold < 0.0) {
|
||||
int super_y_bottom =
|
||||
kBlnBaselineOffset + kBlnXHeight * superscript_min_y_bottom;
|
||||
int sub_y_top =
|
||||
kBlnBaselineOffset + kBlnXHeight * subscript_max_y_top;
|
||||
int last_word_char = num_blobs - 1 - num_trailing;
|
||||
float last_char_certainty = word->best_choice->certainty(last_word_char);
|
||||
if (word->best_choice->unichar_id(last_word_char) != 0 &&
|
||||
last_char_certainty <= unlikely_threshold) {
|
||||
ScriptPos rpos;
|
||||
YOutlierPieces(word, last_word_char, super_y_bottom, sub_y_top,
|
||||
NULL, NULL, &rpos, &num_remainder_trailing);
|
||||
if (num_trailing > 0 && rpos != sp_trailing) num_remainder_trailing = 0;
|
||||
if (num_remainder_trailing > 0 &&
|
||||
last_char_certainty < trailing_certainty) {
|
||||
trailing_certainty = last_char_certainty;
|
||||
}
|
||||
}
|
||||
bool another_blob_available = (num_remainder_trailing == 0) ||
|
||||
num_leading + num_trailing + 1 < num_blobs;
|
||||
int first_char_certainty = word->best_choice->certainty(num_leading);
|
||||
if (another_blob_available &&
|
||||
word->best_choice->unichar_id(num_leading) != 0 &&
|
||||
first_char_certainty <= unlikely_threshold) {
|
||||
ScriptPos lpos;
|
||||
YOutlierPieces(word, num_leading, super_y_bottom, sub_y_top,
|
||||
&lpos, &num_remainder_leading, NULL, NULL);
|
||||
if (num_leading > 0 && lpos != sp_leading) num_remainder_leading = 0;
|
||||
if (num_remainder_leading > 0 &&
|
||||
first_char_certainty < leading_certainty) {
|
||||
leading_certainty = first_char_certainty;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// If nothing to do, bail now.
|
||||
if (num_leading + num_trailing +
|
||||
num_remainder_leading + num_remainder_trailing == 0) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if (superscript_debug >= 1) {
|
||||
tprintf("Candidate for superscript detection: %s (",
|
||||
word->best_choice->unichar_string().string());
|
||||
if (num_leading || num_remainder_leading) {
|
||||
tprintf("%d.%d %s-leading ", num_leading, num_remainder_leading,
|
||||
leading_pos);
|
||||
}
|
||||
if (num_trailing || num_remainder_trailing) {
|
||||
tprintf("%d.%d %s-trailing ", num_trailing, num_remainder_trailing,
|
||||
trailing_pos);
|
||||
}
|
||||
tprintf(")\n");
|
||||
}
|
||||
if (superscript_debug >= 3) {
|
||||
word->best_choice->print();
|
||||
}
|
||||
if (superscript_debug >= 2) {
|
||||
tprintf(" Certainties -- Average: %.2f Unlikely thresh: %.2f ",
|
||||
avg_certainty, unlikely_threshold);
|
||||
if (num_leading)
|
||||
tprintf("Orig. leading (min): %.2f ", leading_certainty);
|
||||
if (num_trailing)
|
||||
tprintf("Orig. trailing (min): %.2f ", trailing_certainty);
|
||||
tprintf("\n");
|
||||
}
|
||||
|
||||
// We've now calculated the number of rebuilt blobs we want to carve off.
|
||||
// However, split_word() works from TBLOBs in chopped_word, so we need to
|
||||
// convert to those.
|
||||
int num_chopped_leading =
|
||||
LeadingUnicharsToChopped(word, num_leading) + num_remainder_leading;
|
||||
int num_chopped_trailing =
|
||||
TrailingUnicharsToChopped(word, num_trailing) + num_remainder_trailing;
|
||||
|
||||
int retry_leading = 0;
|
||||
int retry_trailing = 0;
|
||||
bool is_good = false;
|
||||
WERD_RES *revised = TrySuperscriptSplits(
|
||||
num_chopped_leading, leading_certainty, sp_leading,
|
||||
num_chopped_trailing, trailing_certainty, sp_trailing,
|
||||
word, &is_good, &retry_leading, &retry_trailing);
|
||||
if (is_good) {
|
||||
word->ConsumeWordResults(revised);
|
||||
}
|
||||
else if (retry_leading || retry_trailing) {
|
||||
int retry_chopped_leading =
|
||||
LeadingUnicharsToChopped(revised, retry_leading);
|
||||
int retry_chopped_trailing =
|
||||
TrailingUnicharsToChopped(revised, retry_trailing);
|
||||
WERD_RES *revised2 = TrySuperscriptSplits(
|
||||
retry_chopped_leading, leading_certainty, sp_leading,
|
||||
retry_chopped_trailing, trailing_certainty, sp_trailing,
|
||||
revised, &is_good, &retry_leading, &retry_trailing);
|
||||
if (is_good) {
|
||||
word->ConsumeWordResults(revised2);
|
||||
}
|
||||
delete revised2;
|
||||
}
|
||||
delete revised;
|
||||
return is_good;
|
||||
}
|
||||
|
||||
/**
|
||||
* Determine how many characters (rebuilt blobs) on each end of a given word
|
||||
* might plausibly be superscripts so SubAndSuperscriptFix can try to
|
||||
* re-recognize them. Even if we find no whole blobs at either end,
|
||||
* we will set *unlikely_threshold to a certainty that might be used to
|
||||
* select "bad enough" outlier characters. If *unlikely_threshold is set to 0,
|
||||
* though, there's really no hope.
|
||||
*
|
||||
* @param[in] word The word to examine.
|
||||
* @param[out] num_rebuilt_leading the number of rebuilt blobs at the start
|
||||
* of the word which are all up or down and
|
||||
* seem badly classified.
|
||||
* @param[out] leading_pos "super" or "sub" (for debugging)
|
||||
* @param[out] leading_certainty the worst certainty in the leading blobs.
|
||||
* @param[out] num_rebuilt_trailing the number of rebuilt blobs at the end
|
||||
* of the word which are all up or down and
|
||||
* seem badly classified.
|
||||
* @param[out] trailing_pos "super" or "sub" (for debugging)
|
||||
* @param[out] trailing_certainty the worst certainty in the trailing blobs.
|
||||
* @param[out] avg_certainty the average certainty of "normal" blobs in
|
||||
* the word.
|
||||
* @param[out] unlikely_threshold the threshold (on certainty) we used to
|
||||
* select "bad enough" outlier characters.
|
||||
*/
|
||||
void Tesseract::GetSubAndSuperscriptCandidates(const WERD_RES *word,
|
||||
int *num_rebuilt_leading,
|
||||
ScriptPos *leading_pos,
|
||||
float *leading_certainty,
|
||||
int *num_rebuilt_trailing,
|
||||
ScriptPos *trailing_pos,
|
||||
float *trailing_certainty,
|
||||
float *avg_certainty,
|
||||
float *unlikely_threshold) {
|
||||
*avg_certainty = *unlikely_threshold = 0.0f;
|
||||
*num_rebuilt_leading = *num_rebuilt_trailing = 0;
|
||||
*leading_certainty = *trailing_certainty = 0.0f;
|
||||
|
||||
int super_y_bottom =
|
||||
kBlnBaselineOffset + kBlnXHeight * superscript_min_y_bottom;
|
||||
int sub_y_top =
|
||||
kBlnBaselineOffset + kBlnXHeight * subscript_max_y_top;
|
||||
|
||||
// Step one: Get an average certainty for "normally placed" characters.
|
||||
|
||||
// Counts here are of blobs in the rebuild_word / unichars in best_choice.
|
||||
*leading_pos = *trailing_pos = SP_NORMAL;
|
||||
int leading_outliers = 0;
|
||||
int trailing_outliers = 0;
|
||||
int num_normal = 0;
|
||||
float normal_certainty_total = 0.0f;
|
||||
float worst_normal_certainty = 0.0f;
|
||||
ScriptPos last_pos = SP_NORMAL;
|
||||
int num_blobs = word->rebuild_word->NumBlobs();
|
||||
for (int b = 0; b < num_blobs; ++b) {
|
||||
TBOX box = word->rebuild_word->blobs[b]->bounding_box();
|
||||
ScriptPos pos = SP_NORMAL;
|
||||
if (box.bottom() >= super_y_bottom) {
|
||||
pos = SP_SUPERSCRIPT;
|
||||
}
|
||||
else if (box.top() <= sub_y_top) {
|
||||
pos = SP_SUBSCRIPT;
|
||||
}
|
||||
if (pos == SP_NORMAL) {
|
||||
if (word->best_choice->unichar_id(b) != 0) {
|
||||
float char_certainty = word->best_choice->certainty(b);
|
||||
if (char_certainty < worst_normal_certainty) {
|
||||
worst_normal_certainty = char_certainty;
|
||||
}
|
||||
num_normal++;
|
||||
normal_certainty_total += char_certainty;
|
||||
}
|
||||
if (trailing_outliers == b) {
|
||||
leading_outliers = trailing_outliers;
|
||||
*leading_pos = last_pos;
|
||||
}
|
||||
trailing_outliers = 0;
|
||||
}
|
||||
else {
|
||||
if (last_pos == pos) {
|
||||
trailing_outliers++;
|
||||
}
|
||||
else {
|
||||
trailing_outliers = 1;
|
||||
}
|
||||
}
|
||||
last_pos = pos;
|
||||
}
|
||||
*trailing_pos = last_pos;
|
||||
if (num_normal >= 3) { // throw out the worst as an outlier.
|
||||
num_normal--;
|
||||
normal_certainty_total -= worst_normal_certainty;
|
||||
}
|
||||
if (num_normal > 0) {
|
||||
*avg_certainty = normal_certainty_total / num_normal;
|
||||
*unlikely_threshold = superscript_worse_certainty * (*avg_certainty);
|
||||
}
|
||||
if (num_normal == 0 ||
|
||||
(leading_outliers == 0 && trailing_outliers == 0)) {
|
||||
return;
|
||||
}
|
||||
|
||||
// Step two: Try to split off bits of the word that are both outliers
|
||||
// and have much lower certainty than average
|
||||
// Calculate num_leading and leading_certainty.
|
||||
for (*leading_certainty = 0.0f, *num_rebuilt_leading = 0;
|
||||
*num_rebuilt_leading < leading_outliers;
|
||||
(*num_rebuilt_leading)++) {
|
||||
float char_certainty = word->best_choice->certainty(*num_rebuilt_leading);
|
||||
if (char_certainty > *unlikely_threshold) {
|
||||
break;
|
||||
}
|
||||
if (char_certainty < *leading_certainty) {
|
||||
*leading_certainty = char_certainty;
|
||||
}
|
||||
}
|
||||
|
||||
// Calculate num_trailing and trailing_certainty.
|
||||
for (*trailing_certainty = 0.0f, *num_rebuilt_trailing = 0;
|
||||
*num_rebuilt_trailing < trailing_outliers;
|
||||
(*num_rebuilt_trailing)++) {
|
||||
int blob_idx = num_blobs - 1 - *num_rebuilt_trailing;
|
||||
float char_certainty = word->best_choice->certainty(blob_idx);
|
||||
if (char_certainty > *unlikely_threshold) {
|
||||
break;
|
||||
}
|
||||
if (char_certainty < *trailing_certainty) {
|
||||
*trailing_certainty = char_certainty;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Try splitting off the given number of (chopped) blobs from the front and
|
||||
* back of the given word and recognizing the pieces.
|
||||
*
|
||||
* @param[in] num_chopped_leading how many chopped blobs from the left
|
||||
* end of the word to chop off and try recognizing as a
|
||||
* superscript (or subscript)
|
||||
* @param[in] leading_certainty the (minimum) certainty had by the
|
||||
* characters in the original leading section.
|
||||
* @param[in] leading_pos "super" or "sub" (for debugging)
|
||||
* @param[in] num_chopped_trailing how many chopped blobs from the right
|
||||
* end of the word to chop off and try recognizing as a
|
||||
* superscript (or subscript)
|
||||
* @param[in] trailing_certainty the (minimum) certainty had by the
|
||||
* characters in the original trailing section.
|
||||
* @param[in] trailing_pos "super" or "sub" (for debugging)
|
||||
* @param[in] word the word to try to chop up.
|
||||
* @param[out] is_good do we believe our result?
|
||||
* @param[out] retry_rebuild_leading, retry_rebuild_trailing
|
||||
* If non-zero, and !is_good, then the caller may have luck trying
|
||||
* to split the returned word with this number of (rebuilt) leading
|
||||
* and trailing blobs / unichars.
|
||||
* @return A word which is the result of re-recognizing as asked.
|
||||
*/
|
||||
WERD_RES *Tesseract::TrySuperscriptSplits(
|
||||
int num_chopped_leading, float leading_certainty, ScriptPos leading_pos,
|
||||
int num_chopped_trailing, float trailing_certainty,
|
||||
ScriptPos trailing_pos,
|
||||
WERD_RES *word,
|
||||
bool *is_good,
|
||||
int *retry_rebuild_leading, int *retry_rebuild_trailing) {
|
||||
int num_chopped = word->chopped_word->NumBlobs();
|
||||
|
||||
*retry_rebuild_leading = *retry_rebuild_trailing = 0;
|
||||
|
||||
// Chop apart the word into up to three pieces.
|
||||
|
||||
BlamerBundle *bb0 = NULL;
|
||||
BlamerBundle *bb1 = NULL;
|
||||
WERD_RES *prefix = NULL;
|
||||
WERD_RES *core = NULL;
|
||||
WERD_RES *suffix = NULL;
|
||||
if (num_chopped_leading > 0) {
|
||||
prefix = new WERD_RES(*word);
|
||||
split_word(prefix, num_chopped_leading, &core, &bb0);
|
||||
}
|
||||
else {
|
||||
core = new WERD_RES(*word);
|
||||
}
|
||||
|
||||
if (num_chopped_trailing > 0) {
|
||||
int split_pt = num_chopped - num_chopped_trailing - num_chopped_leading;
|
||||
split_word(core, split_pt, &suffix, &bb1);
|
||||
}
|
||||
|
||||
// Recognize the pieces in turn.
|
||||
int saved_cp_multiplier = classify_class_pruner_multiplier;
|
||||
int saved_im_multiplier = classify_integer_matcher_multiplier;
|
||||
if (prefix) {
|
||||
// Turn off Tesseract's y-position penalties for the leading superscript.
|
||||
classify_class_pruner_multiplier.set_value(0);
|
||||
classify_integer_matcher_multiplier.set_value(0);
|
||||
|
||||
// Adjust our expectations about the baseline for this prefix.
|
||||
if (superscript_debug >= 3) {
|
||||
tprintf(" recognizing first %d chopped blobs\n", num_chopped_leading);
|
||||
}
|
||||
recog_word_recursive(prefix);
|
||||
if (superscript_debug >= 2) {
|
||||
tprintf(" The leading bits look like %s %s\n",
|
||||
ScriptPosToString(leading_pos),
|
||||
prefix->best_choice->unichar_string().string());
|
||||
}
|
||||
|
||||
// Restore the normal y-position penalties.
|
||||
classify_class_pruner_multiplier.set_value(saved_cp_multiplier);
|
||||
classify_integer_matcher_multiplier.set_value(saved_im_multiplier);
|
||||
}
|
||||
|
||||
if (superscript_debug >= 3) {
|
||||
tprintf(" recognizing middle %d chopped blobs\n",
|
||||
num_chopped - num_chopped_leading - num_chopped_trailing);
|
||||
}
|
||||
|
||||
if (suffix) {
|
||||
// Turn off Tesseract's y-position penalties for the trailing superscript.
|
||||
classify_class_pruner_multiplier.set_value(0);
|
||||
classify_integer_matcher_multiplier.set_value(0);
|
||||
|
||||
if (superscript_debug >= 3) {
|
||||
tprintf(" recognizing last %d chopped blobs\n", num_chopped_trailing);
|
||||
}
|
||||
recog_word_recursive(suffix);
|
||||
if (superscript_debug >= 2) {
|
||||
tprintf(" The trailing bits look like %s %s\n",
|
||||
ScriptPosToString(trailing_pos),
|
||||
suffix->best_choice->unichar_string().string());
|
||||
}
|
||||
|
||||
// Restore the normal y-position penalties.
|
||||
classify_class_pruner_multiplier.set_value(saved_cp_multiplier);
|
||||
classify_integer_matcher_multiplier.set_value(saved_im_multiplier);
|
||||
}
|
||||
|
||||
// Evaluate whether we think the results are believably better
|
||||
// than what we already had.
|
||||
bool good_prefix = !prefix || BelievableSuperscript(
|
||||
superscript_debug >= 1, *prefix,
|
||||
superscript_bettered_certainty * leading_certainty,
|
||||
retry_rebuild_leading, NULL);
|
||||
bool good_suffix = !suffix || BelievableSuperscript(
|
||||
superscript_debug >= 1, *suffix,
|
||||
superscript_bettered_certainty * trailing_certainty,
|
||||
NULL, retry_rebuild_trailing);
|
||||
|
||||
*is_good = good_prefix && good_suffix;
|
||||
if (!*is_good && !*retry_rebuild_leading && !*retry_rebuild_trailing) {
|
||||
// None of it is any good. Quit now.
|
||||
delete core;
|
||||
delete prefix;
|
||||
delete suffix;
|
||||
return NULL;
|
||||
}
|
||||
recog_word_recursive(core);
|
||||
|
||||
// Now paste the results together into core.
|
||||
if (suffix) {
|
||||
suffix->SetAllScriptPositions(trailing_pos);
|
||||
join_words(core, suffix, bb1);
|
||||
}
|
||||
if (prefix) {
|
||||
prefix->SetAllScriptPositions(leading_pos);
|
||||
join_words(prefix, core, bb0);
|
||||
core = prefix;
|
||||
prefix = NULL;
|
||||
}
|
||||
|
||||
if (superscript_debug >= 1) {
|
||||
tprintf("%s superscript fix: %s\n", *is_good ? "ACCEPT" : "REJECT",
|
||||
core->best_choice->unichar_string().string());
|
||||
}
|
||||
return core;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Return whether this is believable superscript or subscript text.
|
||||
*
|
||||
* We insist that:
|
||||
* + there are no punctuation marks.
|
||||
* + there are no italics.
|
||||
* + no normal-sized character is smaller than superscript_scaledown_ratio
|
||||
* of what it ought to be, and
|
||||
* + each character is at least as certain as certainty_threshold.
|
||||
*
|
||||
* @param[in] debug If true, spew debug output
|
||||
* @param[in] word The word whose best_choice we're evaluating
|
||||
* @param[in] certainty_threshold If any of the characters have less
|
||||
* certainty than this, reject.
|
||||
* @param[out] left_ok How many left-side characters were ok?
|
||||
* @param[out] right_ok How many right-side characters were ok?
|
||||
* @return Whether the complete best choice is believable as a superscript.
|
||||
*/
|
||||
bool Tesseract::BelievableSuperscript(bool debug,
|
||||
const WERD_RES &word,
|
||||
float certainty_threshold,
|
||||
int *left_ok,
|
||||
int *right_ok) const {
|
||||
int initial_ok_run_count = 0;
|
||||
int ok_run_count = 0;
|
||||
float worst_certainty = 0.0f;
|
||||
const WERD_CHOICE &wc = *word.best_choice;
|
||||
|
||||
const UnicityTable<FontInfo>& fontinfo_table = get_fontinfo_table();
|
||||
for (int i = 0; i < wc.length(); i++) {
|
||||
TBLOB *blob = word.rebuild_word->blobs[i];
|
||||
UNICHAR_ID unichar_id = wc.unichar_id(i);
|
||||
float char_certainty = wc.certainty(i);
|
||||
bool bad_certainty = char_certainty < certainty_threshold;
|
||||
bool is_punc = wc.unicharset()->get_ispunctuation(unichar_id);
|
||||
bool is_italic = word.fontinfo && word.fontinfo->is_italic();
|
||||
BLOB_CHOICE *choice = word.GetBlobChoice(i);
|
||||
if (choice && fontinfo_table.size() > 0) {
|
||||
// Get better information from the specific choice, if available.
|
||||
int font_id1 = choice->fontinfo_id();
|
||||
bool font1_is_italic = font_id1 >= 0
|
||||
? fontinfo_table.get(font_id1).is_italic() : false;
|
||||
int font_id2 = choice->fontinfo_id2();
|
||||
is_italic = font1_is_italic &&
|
||||
(font_id2 < 0 || fontinfo_table.get(font_id2).is_italic());
|
||||
}
|
||||
|
||||
float height_fraction = 1.0f;
|
||||
float char_height = blob->bounding_box().height();
|
||||
float normal_height = char_height;
|
||||
if (wc.unicharset()->top_bottom_useful()) {
|
||||
int min_bot, max_bot, min_top, max_top;
|
||||
wc.unicharset()->get_top_bottom(unichar_id,
|
||||
&min_bot, &max_bot,
|
||||
&min_top, &max_top);
|
||||
float hi_height = max_top - max_bot;
|
||||
float lo_height = min_top - min_bot;
|
||||
normal_height = (hi_height + lo_height) / 2;
|
||||
if (normal_height >= kBlnXHeight) {
|
||||
// Only ding characters that we have decent information for because
|
||||
// they're supposed to be normal sized, not tiny specks or dashes.
|
||||
height_fraction = char_height / normal_height;
|
||||
}
|
||||
}
|
||||
bool bad_height = height_fraction < superscript_scaledown_ratio;
|
||||
|
||||
if (debug) {
|
||||
if (is_italic) {
|
||||
tprintf(" Rejecting: superscript is italic.\n");
|
||||
}
|
||||
if (is_punc) {
|
||||
tprintf(" Rejecting: punctuation present.\n");
|
||||
}
|
||||
const char *char_str = wc.unicharset()->id_to_unichar(unichar_id);
|
||||
if (bad_certainty) {
|
||||
tprintf(" Rejecting: don't believe character %s with certainty %.2f "
|
||||
"which is less than threshold %.2f\n", char_str,
|
||||
char_certainty, certainty_threshold);
|
||||
}
|
||||
if (bad_height) {
|
||||
tprintf(" Rejecting: character %s seems too small @ %.2f versus "
|
||||
"expected %.2f\n", char_str, char_height, normal_height);
|
||||
}
|
||||
}
|
||||
if (bad_certainty || bad_height || is_punc || is_italic) {
|
||||
if (ok_run_count == i) {
|
||||
initial_ok_run_count = ok_run_count;
|
||||
}
|
||||
ok_run_count = 0;
|
||||
}
|
||||
else {
|
||||
ok_run_count++;
|
||||
}
|
||||
if (char_certainty < worst_certainty) {
|
||||
worst_certainty = char_certainty;
|
||||
}
|
||||
}
|
||||
bool all_ok = ok_run_count == wc.length();
|
||||
if (all_ok && debug) {
|
||||
tprintf(" Accept: worst revised certainty is %.2f\n", worst_certainty);
|
||||
}
|
||||
if (!all_ok) {
|
||||
if (left_ok) *left_ok = initial_ok_run_count;
|
||||
if (right_ok) *right_ok = ok_run_count;
|
||||
}
|
||||
return all_ok;
|
||||
}
|
||||
|
||||
|
||||
} // namespace tesseract
|
|
@ -0,0 +1,82 @@
|
|||
/**********************************************************************
|
||||
* File: tessbox.cpp (Formerly tessbox.c)
|
||||
* Description: Black boxed Tess for developing a resaljet.
|
||||
* Author: Ray Smith
|
||||
* Created: Thu Apr 23 11:03:36 BST 1992
|
||||
*
|
||||
* (C) Copyright 1992, Hewlett-Packard Ltd.
|
||||
** Licensed under the Apache License, Version 2.0 (the "License");
|
||||
** you may not use this file except in compliance with the License.
|
||||
** You may obtain a copy of the License at
|
||||
** http://www.apache.org/licenses/LICENSE-2.0
|
||||
** Unless required by applicable law or agreed to in writing, software
|
||||
** distributed under the License is distributed on an "AS IS" BASIS,
|
||||
** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
** See the License for the specific language governing permissions and
|
||||
** limitations under the License.
|
||||
*
|
||||
**********************************************************************/
|
||||
|
||||
#ifdef _MSC_VER
|
||||
#pragma warning(disable:4244) // Conversion warnings
|
||||
#endif
|
||||
|
||||
#include "mfoutline.h"
|
||||
#include "tessbox.h"
|
||||
#include "tesseractclass.h"
|
||||
|
||||
#define EXTERN
|
||||
|
||||
/**
|
||||
* @name tess_segment_pass_n
|
||||
*
|
||||
* Segment a word using the pass_n conditions of the tess segmenter.
|
||||
* @param pass_n pass number
|
||||
* @param word word to do
|
||||
*/
|
||||
|
||||
namespace tesseract {
|
||||
void Tesseract::tess_segment_pass_n(int pass_n, WERD_RES *word) {
|
||||
int saved_enable_assoc = 0;
|
||||
int saved_chop_enable = 0;
|
||||
|
||||
if (word->word->flag(W_DONT_CHOP)) {
|
||||
saved_enable_assoc = wordrec_enable_assoc;
|
||||
saved_chop_enable = chop_enable;
|
||||
wordrec_enable_assoc.set_value(0);
|
||||
chop_enable.set_value(0);
|
||||
}
|
||||
if (pass_n == 1)
|
||||
set_pass1();
|
||||
else
|
||||
set_pass2();
|
||||
recog_word(word);
|
||||
if (word->best_choice == NULL)
|
||||
word->SetupFake(*word->uch_set);
|
||||
if (word->word->flag(W_DONT_CHOP)) {
|
||||
wordrec_enable_assoc.set_value(saved_enable_assoc);
|
||||
chop_enable.set_value(saved_chop_enable);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* @name tess_acceptable_word
|
||||
*
|
||||
* @return true if the word is regarded as "good enough".
|
||||
* @param word_choice after context
|
||||
* @param raw_choice before context
|
||||
*/
|
||||
bool Tesseract::tess_acceptable_word(WERD_RES* word) {
|
||||
return getDict().AcceptableResult(word);
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* @name tess_add_doc_word
|
||||
*
|
||||
* Add the given word to the document dictionary
|
||||
*/
|
||||
void Tesseract::tess_add_doc_word(WERD_CHOICE *word_choice) {
|
||||
getDict().add_document_word(*word_choice);
|
||||
}
|
||||
} // namespace tesseract
|
|
@ -0,0 +1,28 @@
|
|||
/**********************************************************************
|
||||
* File: tessbox.h (Formerly tessbox.h)
|
||||
* Description: Black boxed Tess for developing a resaljet.
|
||||
* Author: Ray Smith
|
||||
* Created: Thu Apr 23 11:03:36 BST 1992
|
||||
*
|
||||
* (C) Copyright 1992, Hewlett-Packard Ltd.
|
||||
** Licensed under the Apache License, Version 2.0 (the "License");
|
||||
** you may not use this file except in compliance with the License.
|
||||
** You may obtain a copy of the License at
|
||||
** http://www.apache.org/licenses/LICENSE-2.0
|
||||
** Unless required by applicable law or agreed to in writing, software
|
||||
** distributed under the License is distributed on an "AS IS" BASIS,
|
||||
** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
** See the License for the specific language governing permissions and
|
||||
** limitations under the License.
|
||||
*
|
||||
**********************************************************************/
|
||||
|
||||
#ifndef TESSBOX_H
|
||||
#define TESSBOX_H
|
||||
|
||||
#include "ratngs.h"
|
||||
#include "tesseractclass.h"
|
||||
|
||||
// TODO(ocr-team): Delete this along with other empty header files.
|
||||
|
||||
#endif
|
|
@ -0,0 +1,501 @@
|
|||
/**********************************************************************
|
||||
* File: tessedit.cpp (Formerly tessedit.c)
|
||||
* Description: (Previously) Main program for merge of tess and editor.
|
||||
* Now just code to load the language model and various
|
||||
* engine-specific data files.
|
||||
* Author: Ray Smith
|
||||
* Created: Tue Jan 07 15:21:46 GMT 1992
|
||||
*
|
||||
* (C) Copyright 1992, Hewlett-Packard Ltd.
|
||||
** Licensed under the Apache License, Version 2.0 (the "License");
|
||||
** you may not use this file except in compliance with the License.
|
||||
** You may obtain a copy of the License at
|
||||
** http://www.apache.org/licenses/LICENSE-2.0
|
||||
** Unless required by applicable law or agreed to in writing, software
|
||||
** distributed under the License is distributed on an "AS IS" BASIS,
|
||||
** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
** See the License for the specific language governing permissions and
|
||||
** limitations under the License.
|
||||
*
|
||||
**********************************************************************/
|
||||
|
||||
// Include automatically generated configuration file if running autoconf.
|
||||
#ifdef HAVE_CONFIG_H
|
||||
#include "config_auto.h"
|
||||
#endif
|
||||
|
||||
#include "stderr.h"
|
||||
#include "basedir.h"
|
||||
#include "tessvars.h"
|
||||
#include "control.h"
|
||||
#include "reject.h"
|
||||
#include "pageres.h"
|
||||
#include "nwmain.h"
|
||||
#include "pgedit.h"
|
||||
#include "tprintf.h"
|
||||
#include "tessedit.h"
|
||||
#include "stopper.h"
|
||||
#include "intmatcher.h"
|
||||
#include "chop.h"
|
||||
#include "efio.h"
|
||||
#include "danerror.h"
|
||||
#include "globals.h"
|
||||
#include "tesseractclass.h"
|
||||
#include "params.h"
|
||||
|
||||
#define VARDIR "configs/" /*variables files */
|
||||
// config under api
|
||||
#define API_CONFIG "configs/api_config"
|
||||
|
||||
ETEXT_DESC *global_monitor = NULL; // progress monitor
|
||||
|
||||
namespace tesseract {
|
||||
|
||||
// Read a "config" file containing a set of variable, value pairs.
|
||||
// Searches the standard places: tessdata/configs, tessdata/tessconfigs
|
||||
// and also accepts a relative or absolute path name.
|
||||
void Tesseract::read_config_file(const char *filename,
|
||||
SetParamConstraint constraint) {
|
||||
STRING path = datadir;
|
||||
path += "configs/";
|
||||
path += filename;
|
||||
FILE* fp;
|
||||
if ((fp = fopen(path.string(), "rb")) != NULL) {
|
||||
fclose(fp);
|
||||
}
|
||||
else {
|
||||
path = datadir;
|
||||
path += "tessconfigs/";
|
||||
path += filename;
|
||||
if ((fp = fopen(path.string(), "rb")) != NULL) {
|
||||
fclose(fp);
|
||||
}
|
||||
else {
|
||||
path = filename;
|
||||
}
|
||||
}
|
||||
ParamUtils::ReadParamsFile(path.string(), constraint, this->params());
|
||||
}
|
||||
|
||||
// Returns false if a unicharset file for the specified language was not found
|
||||
// or was invalid.
|
||||
// This function initializes TessdataManager. After TessdataManager is
|
||||
// no longer needed, TessdataManager::End() should be called.
|
||||
//
|
||||
// This function sets tessedit_oem_mode to the given OcrEngineMode oem, unless
|
||||
// it is OEM_DEFAULT, in which case the value of the variable will be obtained
|
||||
// from the language-specific config file (stored in [lang].traineddata), from
|
||||
// the config files specified on the command line or left as the default
|
||||
// OEM_TESSERACT_ONLY if none of the configs specify this variable.
|
||||
bool Tesseract::init_tesseract_lang_data(
|
||||
const char *arg0, const char *textbase, const char *language,
|
||||
OcrEngineMode oem, char **configs, int configs_size,
|
||||
const GenericVector<STRING> *vars_vec,
|
||||
const GenericVector<STRING> *vars_values,
|
||||
bool set_only_non_debug_params) {
|
||||
// Set the basename, compute the data directory.
|
||||
main_setup(arg0, textbase);
|
||||
|
||||
// Set the language data path prefix
|
||||
lang = language != NULL ? language : "eng";
|
||||
language_data_path_prefix = datadir;
|
||||
language_data_path_prefix += lang;
|
||||
language_data_path_prefix += ".";
|
||||
|
||||
// Initialize TessdataManager.
|
||||
//STRING tessdata_path = language_data_path_prefix + kTrainedDataSuffix;
|
||||
STRING tessdata_path = arg0;
|
||||
if (!tessdata_manager.Init(tessdata_path.string(),
|
||||
tessdata_manager_debug_level)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
// If a language specific config file (lang.config) exists, load it in.
|
||||
if (tessdata_manager.SeekToStart(TESSDATA_LANG_CONFIG)) {
|
||||
ParamUtils::ReadParamsFromFp(
|
||||
tessdata_manager.GetDataFilePtr(),
|
||||
tessdata_manager.GetEndOffset(TESSDATA_LANG_CONFIG),
|
||||
SET_PARAM_CONSTRAINT_NONE, this->params());
|
||||
if (tessdata_manager_debug_level) {
|
||||
tprintf("Loaded language config file\n");
|
||||
}
|
||||
}
|
||||
|
||||
SetParamConstraint set_params_constraint = set_only_non_debug_params ?
|
||||
SET_PARAM_CONSTRAINT_NON_DEBUG_ONLY : SET_PARAM_CONSTRAINT_NONE;
|
||||
// Load tesseract variables from config files. This is done after loading
|
||||
// language-specific variables from [lang].traineddata file, so that custom
|
||||
// config files can override values in [lang].traineddata file.
|
||||
for (int i = 0; i < configs_size; ++i) {
|
||||
read_config_file(configs[i], set_params_constraint);
|
||||
}
|
||||
|
||||
// Set params specified in vars_vec (done after setting params from config
|
||||
// files, so that params in vars_vec can override those from files).
|
||||
if (vars_vec != NULL && vars_values != NULL) {
|
||||
for (int i = 0; i < vars_vec->size(); ++i) {
|
||||
if (!ParamUtils::SetParam((*vars_vec)[i].string(),
|
||||
(*vars_values)[i].string(),
|
||||
set_params_constraint, this->params())) {
|
||||
tprintf("Error setting param %s\n", (*vars_vec)[i].string());
|
||||
exit(1);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (((STRING &)tessedit_write_params_to_file).length() > 0) {
|
||||
FILE *params_file = fopen(tessedit_write_params_to_file.string(), "wb");
|
||||
if (params_file != NULL) {
|
||||
ParamUtils::PrintParams(params_file, this->params());
|
||||
fclose(params_file);
|
||||
if (tessdata_manager_debug_level > 0) {
|
||||
tprintf("Wrote parameters to %s\n",
|
||||
tessedit_write_params_to_file.string());
|
||||
}
|
||||
}
|
||||
else {
|
||||
tprintf("Failed to open %s for writing params.\n",
|
||||
tessedit_write_params_to_file.string());
|
||||
}
|
||||
}
|
||||
|
||||
// Determine which ocr engine(s) should be loaded and used for recognition.
|
||||
if (oem != OEM_DEFAULT) tessedit_ocr_engine_mode.set_value(oem);
|
||||
if (tessdata_manager_debug_level) {
|
||||
tprintf("Loading Tesseract/Cube with tessedit_ocr_engine_mode %d\n",
|
||||
static_cast<int>(tessedit_ocr_engine_mode));
|
||||
}
|
||||
|
||||
// If we are only loading the config file (and so not planning on doing any
|
||||
// recognition) then there's nothing else do here.
|
||||
if (tessedit_init_config_only) {
|
||||
if (tessdata_manager_debug_level) {
|
||||
tprintf("Returning after loading config file\n");
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
// Load the unicharset
|
||||
if (!tessdata_manager.SeekToStart(TESSDATA_UNICHARSET) ||
|
||||
!unicharset.load_from_file(tessdata_manager.GetDataFilePtr())) {
|
||||
return false;
|
||||
}
|
||||
if (unicharset.size() > MAX_NUM_CLASSES) {
|
||||
tprintf("Error: Size of unicharset is greater than MAX_NUM_CLASSES\n");
|
||||
return false;
|
||||
}
|
||||
if (tessdata_manager_debug_level) tprintf("Loaded unicharset\n");
|
||||
right_to_left_ = unicharset.major_right_to_left();
|
||||
|
||||
// Setup initial unichar ambigs table and read universal ambigs.
|
||||
UNICHARSET encoder_unicharset;
|
||||
encoder_unicharset.CopyFrom(unicharset);
|
||||
unichar_ambigs.InitUnicharAmbigs(unicharset, use_ambigs_for_adaption);
|
||||
unichar_ambigs.LoadUniversal(encoder_unicharset, &unicharset);
|
||||
|
||||
if (!tessedit_ambigs_training &&
|
||||
tessdata_manager.SeekToStart(TESSDATA_AMBIGS)) {
|
||||
TFile ambigs_file;
|
||||
ambigs_file.Open(tessdata_manager.GetDataFilePtr(),
|
||||
tessdata_manager.GetEndOffset(TESSDATA_AMBIGS) + 1);
|
||||
unichar_ambigs.LoadUnicharAmbigs(
|
||||
encoder_unicharset,
|
||||
&ambigs_file,
|
||||
ambigs_debug_level, use_ambigs_for_adaption, &unicharset);
|
||||
if (tessdata_manager_debug_level) tprintf("Loaded ambigs\n");
|
||||
}
|
||||
|
||||
// The various OcrEngineMode settings (see publictypes.h) determine which
|
||||
// engine-specific data files need to be loaded. Currently everything needs
|
||||
// the base tesseract data, which supplies other useful information, but
|
||||
// alternative engines, such as cube and LSTM are optional.
|
||||
#ifndef NO_CUBE_BUILD
|
||||
if (tessedit_ocr_engine_mode == OEM_CUBE_ONLY) {
|
||||
ASSERT_HOST(init_cube_objects(false, &tessdata_manager));
|
||||
if (tessdata_manager_debug_level)
|
||||
tprintf("Loaded Cube w/out combiner\n");
|
||||
}
|
||||
else if (tessedit_ocr_engine_mode == OEM_TESSERACT_CUBE_COMBINED) {
|
||||
ASSERT_HOST(init_cube_objects(true, &tessdata_manager));
|
||||
if (tessdata_manager_debug_level)
|
||||
tprintf("Loaded Cube with combiner\n");
|
||||
}
|
||||
#endif
|
||||
// Init ParamsModel.
|
||||
// Load pass1 and pass2 weights (for now these two sets are the same, but in
|
||||
// the future separate sets of weights can be generated).
|
||||
for (int p = ParamsModel::PTRAIN_PASS1;
|
||||
p < ParamsModel::PTRAIN_NUM_PASSES; ++p) {
|
||||
language_model_->getParamsModel().SetPass(
|
||||
static_cast<ParamsModel::PassEnum>(p));
|
||||
if (tessdata_manager.SeekToStart(TESSDATA_PARAMS_MODEL)) {
|
||||
if (!language_model_->getParamsModel().LoadFromFp(
|
||||
lang.string(), tessdata_manager.GetDataFilePtr(),
|
||||
tessdata_manager.GetEndOffset(TESSDATA_PARAMS_MODEL))) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
}
|
||||
if (tessdata_manager_debug_level) language_model_->getParamsModel().Print();
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
// Helper returns true if the given string is in the vector of strings.
|
||||
static bool IsStrInList(const STRING& str,
|
||||
const GenericVector<STRING>& str_list) {
|
||||
for (int i = 0; i < str_list.size(); ++i) {
|
||||
if (str_list[i] == str)
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
// Parse a string of the form [~]<lang>[+[~]<lang>]*.
|
||||
// Langs with no prefix get appended to to_load, provided they
|
||||
// are not in there already.
|
||||
// Langs with ~ prefix get appended to not_to_load, provided they are not in
|
||||
// there already.
|
||||
void Tesseract::ParseLanguageString(const char* lang_str,
|
||||
GenericVector<STRING>* to_load,
|
||||
GenericVector<STRING>* not_to_load) {
|
||||
STRING remains(lang_str);
|
||||
while (remains.length() > 0) {
|
||||
// Find the start of the lang code and which vector to add to.
|
||||
const char* start = remains.string();
|
||||
while (*start == '+')
|
||||
++start;
|
||||
GenericVector<STRING>* target = to_load;
|
||||
if (*start == '~') {
|
||||
target = not_to_load;
|
||||
++start;
|
||||
}
|
||||
// Find the index of the end of the lang code in string start.
|
||||
int end = strlen(start);
|
||||
const char* plus = strchr(start, '+');
|
||||
if (plus != NULL && plus - start < end)
|
||||
end = plus - start;
|
||||
STRING lang_code(start);
|
||||
lang_code.truncate_at(end);
|
||||
STRING next(start + end);
|
||||
remains = next;
|
||||
// Check whether lang_code is already in the target vector and add.
|
||||
if (!IsStrInList(lang_code, *target)) {
|
||||
if (tessdata_manager_debug_level)
|
||||
tprintf("Adding language '%s' to list\n", lang_code.string());
|
||||
target->push_back(lang_code);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Initialize for potentially a set of languages defined by the language
|
||||
// string and recursively any additional languages required by any language
|
||||
// traineddata file (via tessedit_load_sublangs in its config) that is loaded.
|
||||
// See init_tesseract_internal for args.
|
||||
int Tesseract::init_tesseract(
|
||||
const char *arg0, const char *textbase, const char *language,
|
||||
OcrEngineMode oem, char **configs, int configs_size,
|
||||
const GenericVector<STRING> *vars_vec,
|
||||
const GenericVector<STRING> *vars_values,
|
||||
bool set_only_non_debug_params) {
|
||||
GenericVector<STRING> langs_to_load;
|
||||
GenericVector<STRING> langs_not_to_load;
|
||||
ParseLanguageString(language, &langs_to_load, &langs_not_to_load);
|
||||
|
||||
sub_langs_.delete_data_pointers();
|
||||
sub_langs_.clear();
|
||||
// Find the first loadable lang and load into this.
|
||||
// Add any languages that this language requires
|
||||
bool loaded_primary = false;
|
||||
// Load the rest into sub_langs_.
|
||||
for (int lang_index = 0; lang_index < langs_to_load.size(); ++lang_index) {
|
||||
if (!IsStrInList(langs_to_load[lang_index], langs_not_to_load)) {
|
||||
const char *lang_str = langs_to_load[lang_index].string();
|
||||
Tesseract *tess_to_init;
|
||||
if (!loaded_primary) {
|
||||
tess_to_init = this;
|
||||
}
|
||||
else {
|
||||
tess_to_init = new Tesseract;
|
||||
}
|
||||
|
||||
int result = tess_to_init->init_tesseract_internal(
|
||||
arg0, textbase, lang_str, oem, configs, configs_size,
|
||||
vars_vec, vars_values, set_only_non_debug_params);
|
||||
|
||||
if (!loaded_primary) {
|
||||
if (result < 0) {
|
||||
tprintf("Failed loading language '%s'\n", lang_str);
|
||||
}
|
||||
else {
|
||||
if (tessdata_manager_debug_level)
|
||||
tprintf("Loaded language '%s' as main language\n", lang_str);
|
||||
ParseLanguageString(tess_to_init->tessedit_load_sublangs.string(),
|
||||
&langs_to_load, &langs_not_to_load);
|
||||
loaded_primary = true;
|
||||
}
|
||||
}
|
||||
else {
|
||||
if (result < 0) {
|
||||
tprintf("Failed loading language '%s'\n", lang_str);
|
||||
delete tess_to_init;
|
||||
}
|
||||
else {
|
||||
if (tessdata_manager_debug_level)
|
||||
tprintf("Loaded language '%s' as secondary language\n", lang_str);
|
||||
sub_langs_.push_back(tess_to_init);
|
||||
// Add any languages that this language requires
|
||||
ParseLanguageString(tess_to_init->tessedit_load_sublangs.string(),
|
||||
&langs_to_load, &langs_not_to_load);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
if (!loaded_primary) {
|
||||
tprintf("Tesseract couldn't load any languages!\n");
|
||||
return -1; // Couldn't load any language!
|
||||
}
|
||||
if (!sub_langs_.empty()) {
|
||||
// In multilingual mode word ratings have to be directly comparable,
|
||||
// so use the same language model weights for all languages:
|
||||
// use the primary language's params model if
|
||||
// tessedit_use_primary_params_model is set,
|
||||
// otherwise use default language model weights.
|
||||
if (tessedit_use_primary_params_model) {
|
||||
for (int s = 0; s < sub_langs_.size(); ++s) {
|
||||
sub_langs_[s]->language_model_->getParamsModel().Copy(
|
||||
this->language_model_->getParamsModel());
|
||||
}
|
||||
tprintf("Using params model of the primary language\n");
|
||||
if (tessdata_manager_debug_level) {
|
||||
this->language_model_->getParamsModel().Print();
|
||||
}
|
||||
}
|
||||
else {
|
||||
this->language_model_->getParamsModel().Clear();
|
||||
for (int s = 0; s < sub_langs_.size(); ++s) {
|
||||
sub_langs_[s]->language_model_->getParamsModel().Clear();
|
||||
}
|
||||
if (tessdata_manager_debug_level)
|
||||
tprintf("Using default language params\n");
|
||||
}
|
||||
}
|
||||
|
||||
SetupUniversalFontIds();
|
||||
return 0;
|
||||
}
|
||||
|
||||
// Common initialization for a single language.
|
||||
// arg0 is the datapath for the tessdata directory, which could be the
|
||||
// path of the tessdata directory with no trailing /, or (if tessdata
|
||||
// lives in the same directory as the executable, the path of the executable,
|
||||
// hence the name arg0.
|
||||
// textbase is an optional output file basename (used only for training)
|
||||
// language is the language code to load.
|
||||
// oem controls which engine(s) will operate on the image
|
||||
// configs (argv) is an array of config filenames to load variables from.
|
||||
// May be NULL.
|
||||
// configs_size (argc) is the number of elements in configs.
|
||||
// vars_vec is an optional vector of variables to set.
|
||||
// vars_values is an optional corresponding vector of values for the variables
|
||||
// in vars_vec.
|
||||
// If set_only_init_params is true, then only the initialization variables
|
||||
// will be set.
|
||||
int Tesseract::init_tesseract_internal(
|
||||
const char *arg0, const char *textbase, const char *language,
|
||||
OcrEngineMode oem, char **configs, int configs_size,
|
||||
const GenericVector<STRING> *vars_vec,
|
||||
const GenericVector<STRING> *vars_values,
|
||||
bool set_only_non_debug_params) {
|
||||
if (!init_tesseract_lang_data(arg0, textbase, language, oem, configs,
|
||||
configs_size, vars_vec, vars_values,
|
||||
set_only_non_debug_params)) {
|
||||
return -1;
|
||||
}
|
||||
if (tessedit_init_config_only) {
|
||||
tessdata_manager.End();
|
||||
return 0;
|
||||
}
|
||||
// If only Cube will be used, skip loading Tesseract classifier's
|
||||
// pre-trained templates.
|
||||
bool init_tesseract_classifier =
|
||||
(tessedit_ocr_engine_mode == OEM_TESSERACT_ONLY ||
|
||||
tessedit_ocr_engine_mode == OEM_TESSERACT_CUBE_COMBINED);
|
||||
// If only Cube will be used and if it has its own Unicharset,
|
||||
// skip initializing permuter and loading Tesseract Dawgs.
|
||||
bool init_dict =
|
||||
!(tessedit_ocr_engine_mode == OEM_CUBE_ONLY &&
|
||||
tessdata_manager.SeekToStart(TESSDATA_CUBE_UNICHARSET));
|
||||
program_editup(textbase, init_tesseract_classifier, init_dict);
|
||||
tessdata_manager.End();
|
||||
return 0; //Normal exit
|
||||
}
|
||||
|
||||
// Helper builds the all_fonts table by adding new fonts from new_fonts.
|
||||
static void CollectFonts(const UnicityTable<FontInfo>& new_fonts,
|
||||
UnicityTable<FontInfo>* all_fonts) {
|
||||
for (int i = 0; i < new_fonts.size(); ++i) {
|
||||
// UnicityTable uniques as we go.
|
||||
all_fonts->push_back(new_fonts.get(i));
|
||||
}
|
||||
}
|
||||
|
||||
// Helper assigns an id to lang_fonts using the index in all_fonts table.
|
||||
static void AssignIds(const UnicityTable<FontInfo>& all_fonts,
|
||||
UnicityTable<FontInfo>* lang_fonts) {
|
||||
for (int i = 0; i < lang_fonts->size(); ++i) {
|
||||
int index = all_fonts.get_id(lang_fonts->get(i));
|
||||
lang_fonts->get_mutable(i)->universal_id = index;
|
||||
}
|
||||
}
|
||||
|
||||
// Set the universal_id member of each font to be unique among all
|
||||
// instances of the same font loaded.
|
||||
void Tesseract::SetupUniversalFontIds() {
|
||||
// Note that we can get away with bitwise copying FontInfo in
|
||||
// all_fonts, as it is a temporary structure and we avoid setting the
|
||||
// delete callback.
|
||||
UnicityTable<FontInfo> all_fonts;
|
||||
all_fonts.set_compare_callback(NewPermanentTessCallback(CompareFontInfo));
|
||||
|
||||
// Create the universal ID table.
|
||||
CollectFonts(get_fontinfo_table(), &all_fonts);
|
||||
for (int i = 0; i < sub_langs_.size(); ++i) {
|
||||
CollectFonts(sub_langs_[i]->get_fontinfo_table(), &all_fonts);
|
||||
}
|
||||
// Assign ids from the table to each font table.
|
||||
AssignIds(all_fonts, &get_fontinfo_table());
|
||||
for (int i = 0; i < sub_langs_.size(); ++i) {
|
||||
AssignIds(all_fonts, &sub_langs_[i]->get_fontinfo_table());
|
||||
}
|
||||
font_table_size_ = all_fonts.size();
|
||||
}
|
||||
|
||||
// init the LM component
|
||||
int Tesseract::init_tesseract_lm(const char *arg0,
|
||||
const char *textbase,
|
||||
const char *language) {
|
||||
if (!init_tesseract_lang_data(arg0, textbase, language, OEM_TESSERACT_ONLY,
|
||||
NULL, 0, NULL, NULL, false))
|
||||
return -1;
|
||||
getDict().SetupForLoad(Dict::GlobalDawgCache());
|
||||
getDict().Load(tessdata_manager.GetDataFileName().string(), lang);
|
||||
getDict().FinishLoad();
|
||||
tessdata_manager.End();
|
||||
return 0;
|
||||
}
|
||||
|
||||
void Tesseract::end_tesseract() {
|
||||
end_recog();
|
||||
}
|
||||
|
||||
/* Define command type identifiers */
|
||||
|
||||
enum CMD_EVENTS
|
||||
{
|
||||
ACTION_1_CMD_EVENT,
|
||||
RECOG_WERDS,
|
||||
RECOG_PSEUDO,
|
||||
ACTION_2_CMD_EVENT
|
||||
};
|
||||
} // namespace tesseract
|
|
@ -0,0 +1,29 @@
|
|||
/**********************************************************************
|
||||
* File: tessedit.h (Formerly tessedit.h)
|
||||
* Description: Main program for merge of tess and editor.
|
||||
* Author: Ray Smith
|
||||
* Created: Tue Jan 07 15:21:46 GMT 1992
|
||||
*
|
||||
* (C) Copyright 1992, Hewlett-Packard Ltd.
|
||||
** Licensed under the Apache License, Version 2.0 (the "License");
|
||||
** you may not use this file except in compliance with the License.
|
||||
** You may obtain a copy of the License at
|
||||
** http://www.apache.org/licenses/LICENSE-2.0
|
||||
** Unless required by applicable law or agreed to in writing, software
|
||||
** distributed under the License is distributed on an "AS IS" BASIS,
|
||||
** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
** See the License for the specific language governing permissions and
|
||||
** limitations under the License.
|
||||
*
|
||||
**********************************************************************/
|
||||
|
||||
#ifndef TESSEDIT_H
|
||||
#define TESSEDIT_H
|
||||
|
||||
#include "blobs.h"
|
||||
#include "pgedit.h"
|
||||
|
||||
//progress monitor
|
||||
extern ETEXT_DESC *global_monitor;
|
||||
|
||||
#endif
|
|
@ -0,0 +1,305 @@
|
|||
/**********************************************************************
|
||||
* File: tesseract_cube_combiner.h
|
||||
* Description: Declaration of the Tesseract & Cube results combiner Class
|
||||
* Author: Ahmad Abdulkader
|
||||
* Created: 2008
|
||||
*
|
||||
* (C) Copyright 2008, Google Inc.
|
||||
** Licensed under the Apache License, Version 2.0 (the "License");
|
||||
** you may not use this file except in compliance with the License.
|
||||
** You may obtain a copy of the License at
|
||||
** http://www.apache.org/licenses/LICENSE-2.0
|
||||
** Unless required by applicable law or agreed to in writing, software
|
||||
** distributed under the License is distributed on an "AS IS" BASIS,
|
||||
** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
** See the License for the specific language governing permissions and
|
||||
** limitations under the License.
|
||||
*
|
||||
**********************************************************************/
|
||||
|
||||
// The TesseractCubeCombiner class provides the functionality of combining
|
||||
// the recognition results of Tesseract and Cube at the word level
|
||||
|
||||
#include <algorithm>
|
||||
#include <wctype.h>
|
||||
|
||||
#include "tesseract_cube_combiner.h"
|
||||
|
||||
#include "cube_object.h"
|
||||
#include "cube_reco_context.h"
|
||||
#include "cube_utils.h"
|
||||
#include "neural_net.h"
|
||||
#include "tesseractclass.h"
|
||||
#include "word_altlist.h"
|
||||
|
||||
namespace tesseract {
|
||||
|
||||
TesseractCubeCombiner::TesseractCubeCombiner(CubeRecoContext *cube_cntxt) {
|
||||
cube_cntxt_ = cube_cntxt;
|
||||
combiner_net_ = NULL;
|
||||
}
|
||||
|
||||
TesseractCubeCombiner::~TesseractCubeCombiner() {
|
||||
if (combiner_net_ != NULL) {
|
||||
delete combiner_net_;
|
||||
combiner_net_ = NULL;
|
||||
}
|
||||
}
|
||||
|
||||
bool TesseractCubeCombiner::LoadCombinerNet() {
|
||||
ASSERT_HOST(cube_cntxt_);
|
||||
// Compute the path of the combiner net
|
||||
string data_path;
|
||||
cube_cntxt_->GetDataFilePath(&data_path);
|
||||
string net_file_name = data_path + cube_cntxt_->Lang() +
|
||||
".tesseract_cube.nn";
|
||||
|
||||
// Return false if file does not exist
|
||||
FILE *fp = fopen(net_file_name.c_str(), "rb");
|
||||
if (fp == NULL)
|
||||
return false;
|
||||
else
|
||||
fclose(fp);
|
||||
|
||||
// Load and validate net
|
||||
combiner_net_ = NeuralNet::FromFile(net_file_name);
|
||||
if (combiner_net_ == NULL) {
|
||||
tprintf("Could not read combiner net file %s", net_file_name.c_str());
|
||||
return false;
|
||||
}
|
||||
else if (combiner_net_->out_cnt() != 2) {
|
||||
tprintf("Invalid combiner net file %s! Output count != 2\n",
|
||||
net_file_name.c_str());
|
||||
delete combiner_net_;
|
||||
combiner_net_ = NULL;
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
// Normalize a UTF-8 string. Converts the UTF-8 string to UTF32 and optionally
|
||||
// strips punc and/or normalizes case and then converts back
|
||||
string TesseractCubeCombiner::NormalizeString(const string &str,
|
||||
bool remove_punc,
|
||||
bool norm_case) {
|
||||
// convert to UTF32
|
||||
string_32 str32;
|
||||
CubeUtils::UTF8ToUTF32(str.c_str(), &str32);
|
||||
// strip punc and normalize
|
||||
string_32 new_str32;
|
||||
for (int idx = 0; idx < str32.length(); idx++) {
|
||||
// if no punc removal is required or not a punctuation character
|
||||
if (!remove_punc || iswpunct(str32[idx]) == 0) {
|
||||
char_32 norm_char = str32[idx];
|
||||
// normalize case if required
|
||||
if (norm_case && iswalpha(norm_char)) {
|
||||
norm_char = towlower(norm_char);
|
||||
}
|
||||
new_str32.push_back(norm_char);
|
||||
}
|
||||
}
|
||||
// convert back to UTF8
|
||||
string new_str;
|
||||
CubeUtils::UTF32ToUTF8(new_str32.c_str(), &new_str);
|
||||
return new_str;
|
||||
}
|
||||
|
||||
// Compares 2 strings optionally ignoring punctuation
|
||||
int TesseractCubeCombiner::CompareStrings(const string &str1,
|
||||
const string &str2,
|
||||
bool ignore_punc,
|
||||
bool ignore_case) {
|
||||
if (!ignore_punc && !ignore_case) {
|
||||
return str1.compare(str2);
|
||||
}
|
||||
string norm_str1 = NormalizeString(str1, ignore_punc, ignore_case);
|
||||
string norm_str2 = NormalizeString(str2, ignore_punc, ignore_case);
|
||||
return norm_str1.compare(norm_str2);
|
||||
}
|
||||
|
||||
// Check if a string is a valid Tess dict word or not
|
||||
bool TesseractCubeCombiner::ValidWord(const string &str) {
|
||||
return (cube_cntxt_->TesseractObject()->getDict().valid_word(str.c_str())
|
||||
> 0);
|
||||
}
|
||||
|
||||
// Public method for computing the combiner features. The agreement
|
||||
// output parameter will be true if both answers are identical,
|
||||
// and false otherwise.
|
||||
bool TesseractCubeCombiner::ComputeCombinerFeatures(const string &tess_str,
|
||||
int tess_confidence,
|
||||
CubeObject *cube_obj,
|
||||
WordAltList *cube_alt_list,
|
||||
vector<double> *features,
|
||||
bool *agreement) {
|
||||
features->clear();
|
||||
*agreement = false;
|
||||
if (cube_alt_list == NULL || cube_alt_list->AltCount() <= 0)
|
||||
return false;
|
||||
|
||||
// Get Cube's best string; return false if empty
|
||||
char_32 *cube_best_str32 = cube_alt_list->Alt(0);
|
||||
if (cube_best_str32 == NULL || CubeUtils::StrLen(cube_best_str32) < 1)
|
||||
return false;
|
||||
string cube_best_str;
|
||||
int cube_best_cost = cube_alt_list->AltCost(0);
|
||||
int cube_best_bigram_cost = 0;
|
||||
bool cube_best_bigram_cost_valid = true;
|
||||
if (cube_cntxt_->Bigrams())
|
||||
cube_best_bigram_cost = cube_cntxt_->Bigrams()->
|
||||
Cost(cube_best_str32, cube_cntxt_->CharacterSet());
|
||||
else
|
||||
cube_best_bigram_cost_valid = false;
|
||||
CubeUtils::UTF32ToUTF8(cube_best_str32, &cube_best_str);
|
||||
|
||||
// Get Tesseract's UTF32 string
|
||||
string_32 tess_str32;
|
||||
CubeUtils::UTF8ToUTF32(tess_str.c_str(), &tess_str32);
|
||||
|
||||
// Compute agreement flag
|
||||
*agreement = (tess_str.compare(cube_best_str) == 0);
|
||||
|
||||
// Get Cube's second best string; if empty, return false
|
||||
char_32 *cube_next_best_str32;
|
||||
string cube_next_best_str;
|
||||
int cube_next_best_cost = WORST_COST;
|
||||
if (cube_alt_list->AltCount() > 1) {
|
||||
cube_next_best_str32 = cube_alt_list->Alt(1);
|
||||
if (cube_next_best_str32 == NULL ||
|
||||
CubeUtils::StrLen(cube_next_best_str32) == 0) {
|
||||
return false;
|
||||
}
|
||||
cube_next_best_cost = cube_alt_list->AltCost(1);
|
||||
CubeUtils::UTF32ToUTF8(cube_next_best_str32, &cube_next_best_str);
|
||||
}
|
||||
// Rank of Tesseract's top result in Cube's alternate list
|
||||
int tess_rank = 0;
|
||||
for (tess_rank = 0; tess_rank < cube_alt_list->AltCount(); tess_rank++) {
|
||||
string alt_str;
|
||||
CubeUtils::UTF32ToUTF8(cube_alt_list->Alt(tess_rank), &alt_str);
|
||||
if (alt_str == tess_str)
|
||||
break;
|
||||
}
|
||||
|
||||
// Cube's cost for tesseract's result. Note that this modifies the
|
||||
// state of cube_obj, including its alternate list by calling RecognizeWord()
|
||||
int tess_cost = cube_obj->WordCost(tess_str.c_str());
|
||||
// Cube's bigram cost of Tesseract's string
|
||||
int tess_bigram_cost = 0;
|
||||
int tess_bigram_cost_valid = true;
|
||||
if (cube_cntxt_->Bigrams())
|
||||
tess_bigram_cost = cube_cntxt_->Bigrams()->
|
||||
Cost(tess_str32.c_str(), cube_cntxt_->CharacterSet());
|
||||
else
|
||||
tess_bigram_cost_valid = false;
|
||||
|
||||
// Tesseract confidence
|
||||
features->push_back(tess_confidence);
|
||||
// Cube cost of Tesseract string
|
||||
features->push_back(tess_cost);
|
||||
// Cube Rank of Tesseract string
|
||||
features->push_back(tess_rank);
|
||||
// length of Tesseract OCR string
|
||||
features->push_back(tess_str.length());
|
||||
// Tesseract OCR string in dictionary
|
||||
features->push_back(ValidWord(tess_str));
|
||||
if (tess_bigram_cost_valid) {
|
||||
// bigram cost of Tesseract string
|
||||
features->push_back(tess_bigram_cost);
|
||||
}
|
||||
// Cube tess_cost of Cube best string
|
||||
features->push_back(cube_best_cost);
|
||||
// Cube tess_cost of Cube next best string
|
||||
features->push_back(cube_next_best_cost);
|
||||
// length of Cube string
|
||||
features->push_back(cube_best_str.length());
|
||||
// Cube string in dictionary
|
||||
features->push_back(ValidWord(cube_best_str));
|
||||
if (cube_best_bigram_cost_valid) {
|
||||
// bigram cost of Cube string
|
||||
features->push_back(cube_best_bigram_cost);
|
||||
}
|
||||
// case-insensitive string comparison, including punctuation
|
||||
int compare_nocase_punc = CompareStrings(cube_best_str,
|
||||
tess_str, false, true);
|
||||
features->push_back(compare_nocase_punc == 0);
|
||||
// case-sensitive string comparison, ignoring punctuation
|
||||
int compare_case_nopunc = CompareStrings(cube_best_str,
|
||||
tess_str, true, false);
|
||||
features->push_back(compare_case_nopunc == 0);
|
||||
// case-insensitive string comparison, ignoring punctuation
|
||||
int compare_nocase_nopunc = CompareStrings(cube_best_str,
|
||||
tess_str, true, true);
|
||||
features->push_back(compare_nocase_nopunc == 0);
|
||||
return true;
|
||||
}
|
||||
|
||||
// The CubeObject parameter is used for 2 purposes: 1) to retrieve
|
||||
// cube's alt list, and 2) to compute cube's word cost for the
|
||||
// tesseract result. The call to CubeObject::WordCost() modifies
|
||||
// the object's alternate list, so previous state will be lost.
|
||||
float TesseractCubeCombiner::CombineResults(WERD_RES *tess_res,
|
||||
CubeObject *cube_obj) {
|
||||
// If no combiner is loaded or the cube object is undefined,
|
||||
// tesseract wins with probability 1.0
|
||||
if (combiner_net_ == NULL || cube_obj == NULL) {
|
||||
tprintf("Cube WARNING (TesseractCubeCombiner::CombineResults): "
|
||||
"Cube objects not initialized; defaulting to Tesseract\n");
|
||||
return 1.0;
|
||||
}
|
||||
|
||||
// Retrieve the alternate list from the CubeObject's current state.
|
||||
// If the alt list empty, tesseract wins with probability 1.0
|
||||
WordAltList *cube_alt_list = cube_obj->AlternateList();
|
||||
if (cube_alt_list == NULL)
|
||||
cube_alt_list = cube_obj->RecognizeWord();
|
||||
if (cube_alt_list == NULL || cube_alt_list->AltCount() <= 0) {
|
||||
tprintf("Cube WARNING (TesseractCubeCombiner::CombineResults): "
|
||||
"Cube returned no results; defaulting to Tesseract\n");
|
||||
return 1.0;
|
||||
}
|
||||
return CombineResults(tess_res, cube_obj, cube_alt_list);
|
||||
}
|
||||
|
||||
// The alt_list parameter is expected to have been extracted from the
|
||||
// CubeObject that recognized the word to be combined. The cube_obj
|
||||
// parameter passed may be either same instance or a separate instance to
|
||||
// be used only by the combiner. In both cases, its alternate
|
||||
// list will be modified by an internal call to RecognizeWord().
|
||||
float TesseractCubeCombiner::CombineResults(WERD_RES *tess_res,
|
||||
CubeObject *cube_obj,
|
||||
WordAltList *cube_alt_list) {
|
||||
// If no combiner is loaded or the cube object is undefined, or the
|
||||
// alt list is empty, tesseract wins with probability 1.0
|
||||
if (combiner_net_ == NULL || cube_obj == NULL ||
|
||||
cube_alt_list == NULL || cube_alt_list->AltCount() <= 0) {
|
||||
tprintf("Cube WARNING (TesseractCubeCombiner::CombineResults): "
|
||||
"Cube result cannot be retrieved; defaulting to Tesseract\n");
|
||||
return 1.0;
|
||||
}
|
||||
|
||||
// Tesseract result string, tesseract confidence, and cost of
|
||||
// tesseract result according to cube
|
||||
string tess_str = tess_res->best_choice->unichar_string().string();
|
||||
// Map certainty [-20.0, 0.0] to confidence [0, 100]
|
||||
int tess_confidence = MIN(100, MAX(1, static_cast<int>(
|
||||
100 + (5 * tess_res->best_choice->certainty()))));
|
||||
|
||||
// Compute the combiner features. If feature computation fails or
|
||||
// answers are identical, tesseract wins with probability 1.0
|
||||
vector<double> features;
|
||||
bool agreement;
|
||||
bool combiner_success = ComputeCombinerFeatures(tess_str, tess_confidence,
|
||||
cube_obj, cube_alt_list,
|
||||
&features, &agreement);
|
||||
if (!combiner_success || agreement)
|
||||
return 1.0;
|
||||
|
||||
// Classify combiner feature vector and return output (probability
|
||||
// of tesseract class).
|
||||
double net_out[2];
|
||||
if (!combiner_net_->FeedForward(&features[0], net_out))
|
||||
return 1.0;
|
||||
return net_out[1];
|
||||
}
|
||||
}
|
|
@ -0,0 +1,93 @@
|
|||
/**********************************************************************
|
||||
* File: tesseract_cube_combiner.h
|
||||
* Description: Declaration of the Tesseract & Cube results combiner Class
|
||||
* Author: Ahmad Abdulkader
|
||||
* Created: 2008
|
||||
*
|
||||
* (C) Copyright 2008, Google Inc.
|
||||
** Licensed under the Apache License, Version 2.0 (the "License");
|
||||
** you may not use this file except in compliance with the License.
|
||||
** You may obtain a copy of the License at
|
||||
** http://www.apache.org/licenses/LICENSE-2.0
|
||||
** Unless required by applicable law or agreed to in writing, software
|
||||
** distributed under the License is distributed on an "AS IS" BASIS,
|
||||
** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
** See the License for the specific language governing permissions and
|
||||
** limitations under the License.
|
||||
*
|
||||
**********************************************************************/
|
||||
|
||||
// The TesseractCubeCombiner class provides the functionality of combining
|
||||
// the recognition results of Tesseract and Cube at the word level
|
||||
|
||||
#ifndef TESSERACT_CCMAIN_TESSERACT_CUBE_COMBINER_H
|
||||
#define TESSERACT_CCMAIN_TESSERACT_CUBE_COMBINER_H
|
||||
|
||||
#include <string>
|
||||
#include <vector>
|
||||
#include "pageres.h"
|
||||
|
||||
namespace tesseract {
|
||||
|
||||
class CubeObject;
|
||||
class NeuralNet;
|
||||
class CubeRecoContext;
|
||||
class WordAltList;
|
||||
|
||||
class TesseractCubeCombiner {
|
||||
public:
|
||||
explicit TesseractCubeCombiner(CubeRecoContext *cube_cntxt);
|
||||
virtual ~TesseractCubeCombiner();
|
||||
|
||||
// There are 2 public methods for combining the results of tesseract
|
||||
// and cube. Both return the probability that the Tesseract result is
|
||||
// correct. The difference between the two interfaces is in how the
|
||||
// passed-in CubeObject is used.
|
||||
|
||||
// The CubeObject parameter is used for 2 purposes: 1) to retrieve
|
||||
// cube's alt list, and 2) to compute cube's word cost for the
|
||||
// tesseract result. Both uses may modify the state of the
|
||||
// CubeObject (including the BeamSearch state) with a call to
|
||||
// RecognizeWord().
|
||||
float CombineResults(WERD_RES *tess_res, CubeObject *cube_obj);
|
||||
|
||||
// The alt_list parameter is expected to have been extracted from the
|
||||
// CubeObject that recognized the word to be combined. The cube_obj
|
||||
// parameter passed in is a separate instance to be used only by
|
||||
// the combiner.
|
||||
float CombineResults(WERD_RES *tess_res, CubeObject *cube_obj,
|
||||
WordAltList *alt_list);
|
||||
|
||||
// Public method for computing the combiner features. The agreement
|
||||
// output parameter will be true if both answers are identical,
|
||||
// false otherwise. Modifies the cube_alt_list, so no assumptions
|
||||
// should be made about its state upon return.
|
||||
bool ComputeCombinerFeatures(const std::string &tess_res,
|
||||
int tess_confidence,
|
||||
CubeObject *cube_obj,
|
||||
WordAltList *cube_alt_list,
|
||||
std::vector<double> *features,
|
||||
bool *agreement);
|
||||
|
||||
// Is the word valid according to Tesseract's language model
|
||||
bool ValidWord(const std::string &str);
|
||||
|
||||
// Loads the combiner neural network from file, using cube_cntxt_
|
||||
// to find path.
|
||||
bool LoadCombinerNet();
|
||||
private:
|
||||
// Normalize a UTF-8 string. Converts the UTF-8 string to UTF32 and optionally
|
||||
// strips punc and/or normalizes case and then converts back
|
||||
std::string NormalizeString(const std::string &str, bool remove_punc, bool norm_case);
|
||||
|
||||
// Compares 2 strings after optionally normalizing them and or stripping
|
||||
// punctuation
|
||||
int CompareStrings(const std::string &str1, const std::string &str2, bool ignore_punc,
|
||||
bool norm_case);
|
||||
|
||||
NeuralNet *combiner_net_; // pointer to the combiner NeuralNet object
|
||||
CubeRecoContext *cube_cntxt_; // used for language ID and data paths
|
||||
};
|
||||
}
|
||||
|
||||
#endif // TESSERACT_CCMAIN_TESSERACT_CUBE_COMBINER_H
|
|
@ -0,0 +1,769 @@
|
|||
///////////////////////////////////////////////////////////////////////
|
||||
// File: tesseractclass.cpp
|
||||
// Description: The Tesseract class. It holds/owns everything needed
|
||||
// to run Tesseract on a single language, and also a set of
|
||||
// sub-Tesseracts to run sub-languages. For thread safety, *every*
|
||||
// variable that was previously global or static (except for
|
||||
// constant data, and some visual debugging flags) has been moved
|
||||
// in here, directly, or indirectly.
|
||||
// This makes it safe to run multiple Tesseracts in different
|
||||
// threads in parallel, and keeps the different language
|
||||
// instances separate.
|
||||
// Some global functions remain, but they are isolated re-entrant
|
||||
// functions that operate on their arguments. Functions that work
|
||||
// on variable data have been moved to an appropriate class based
|
||||
// mostly on the directory hierarchy. For more information see
|
||||
// slide 6 of "2ArchitectureAndDataStructures" in
|
||||
// https://drive.google.com/file/d/0B7l10Bj_LprhbUlIUFlCdGtDYkE/edit?usp=sharing
|
||||
// Some global data and related functions still exist in the
|
||||
// training-related code, but they don't interfere with normal
|
||||
// recognition operation.
|
||||
// Author: Ray Smith
|
||||
// Created: Fri Mar 07 08:17:01 PST 2008
|
||||
//
|
||||
// (C) Copyright 2008, Google Inc.
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
///////////////////////////////////////////////////////////////////////
|
||||
|
||||
// Include automatically generated configuration file if running autoconf.
|
||||
#ifdef HAVE_CONFIG_H
|
||||
#include "config_auto.h"
|
||||
#endif
|
||||
|
||||
#include "tesseractclass.h"
|
||||
|
||||
#include "allheaders.h"
|
||||
#ifndef NO_CUBE_BUILD
|
||||
#include "cube_reco_context.h"
|
||||
#endif
|
||||
#include "edgblob.h"
|
||||
#include "equationdetect.h"
|
||||
#include "globals.h"
|
||||
#ifndef NO_CUBE_BUILD
|
||||
#include "tesseract_cube_combiner.h"
|
||||
#endif
|
||||
|
||||
namespace tesseract {
|
||||
|
||||
Tesseract::Tesseract()
|
||||
: BOOL_MEMBER(tessedit_resegment_from_boxes, false,
|
||||
"Take segmentation and labeling from box file",
|
||||
this->params()),
|
||||
BOOL_MEMBER(tessedit_resegment_from_line_boxes, false,
|
||||
"Conversion of word/line box file to char box file",
|
||||
this->params()),
|
||||
BOOL_MEMBER(tessedit_train_from_boxes, false,
|
||||
"Generate training data from boxed chars", this->params()),
|
||||
BOOL_MEMBER(tessedit_make_boxes_from_boxes, false,
|
||||
"Generate more boxes from boxed chars", this->params()),
|
||||
BOOL_MEMBER(tessedit_dump_pageseg_images, false,
|
||||
"Dump intermediate images made during page segmentation",
|
||||
this->params()),
|
||||
// The default for pageseg_mode is the old behaviour, so as not to
|
||||
// upset anything that relies on that.
|
||||
INT_MEMBER(
|
||||
tessedit_pageseg_mode, PSM_SINGLE_BLOCK,
|
||||
"Page seg mode: 0=osd only, 1=auto+osd, 2=auto, 3=col, 4=block,"
|
||||
" 5=line, 6=word, 7=char"
|
||||
" (Values from PageSegMode enum in publictypes.h)",
|
||||
this->params()),
|
||||
INT_INIT_MEMBER(tessedit_ocr_engine_mode, tesseract::OEM_TESSERACT_ONLY,
|
||||
"Which OCR engine(s) to run (Tesseract, Cube, both)."
|
||||
" Defaults to loading and running only Tesseract"
|
||||
" (no Cube,no combiner)."
|
||||
" Values from OcrEngineMode enum in tesseractclass.h)",
|
||||
this->params()),
|
||||
STRING_MEMBER(tessedit_char_blacklist, "",
|
||||
"Blacklist of chars not to recognize", this->params()),
|
||||
STRING_MEMBER(tessedit_char_whitelist, "",
|
||||
"Whitelist of chars to recognize", this->params()),
|
||||
STRING_MEMBER(tessedit_char_unblacklist, "",
|
||||
"List of chars to override tessedit_char_blacklist",
|
||||
this->params()),
|
||||
BOOL_MEMBER(tessedit_ambigs_training, false,
|
||||
"Perform training for ambiguities", this->params()),
|
||||
INT_MEMBER(pageseg_devanagari_split_strategy,
|
||||
tesseract::ShiroRekhaSplitter::NO_SPLIT,
|
||||
"Whether to use the top-line splitting process for Devanagari "
|
||||
"documents while performing page-segmentation.",
|
||||
this->params()),
|
||||
INT_MEMBER(ocr_devanagari_split_strategy,
|
||||
tesseract::ShiroRekhaSplitter::NO_SPLIT,
|
||||
"Whether to use the top-line splitting process for Devanagari "
|
||||
"documents while performing ocr.",
|
||||
this->params()),
|
||||
STRING_MEMBER(tessedit_write_params_to_file, "",
|
||||
"Write all parameters to the given file.", this->params()),
|
||||
BOOL_MEMBER(tessedit_adaption_debug, false,
|
||||
"Generate and print debug"
|
||||
" information for adaption",
|
||||
this->params()),
|
||||
INT_MEMBER(bidi_debug, 0, "Debug level for BiDi", this->params()),
|
||||
INT_MEMBER(applybox_debug, 1, "Debug level", this->params()),
|
||||
INT_MEMBER(applybox_page, 0, "Page number to apply boxes from",
|
||||
this->params()),
|
||||
STRING_MEMBER(applybox_exposure_pattern, ".exp",
|
||||
"Exposure value follows"
|
||||
" this pattern in the image filename. The name of the image"
|
||||
" files are expected to be in the form"
|
||||
" [lang].[fontname].exp[num].tif",
|
||||
this->params()),
|
||||
BOOL_MEMBER(applybox_learn_chars_and_char_frags_mode, false,
|
||||
"Learn both character fragments (as is done in the"
|
||||
" special low exposure mode) as well as unfragmented"
|
||||
" characters.",
|
||||
this->params()),
|
||||
BOOL_MEMBER(applybox_learn_ngrams_mode, false,
|
||||
"Each bounding box"
|
||||
" is assumed to contain ngrams. Only learn the ngrams"
|
||||
" whose outlines overlap horizontally.",
|
||||
this->params()),
|
||||
BOOL_MEMBER(tessedit_display_outwords, false, "Draw output words",
|
||||
this->params()),
|
||||
BOOL_MEMBER(tessedit_dump_choices, false, "Dump char choices",
|
||||
this->params()),
|
||||
BOOL_MEMBER(tessedit_timing_debug, false, "Print timing stats",
|
||||
this->params()),
|
||||
BOOL_MEMBER(tessedit_fix_fuzzy_spaces, true,
|
||||
"Try to improve fuzzy spaces", this->params()),
|
||||
BOOL_MEMBER(tessedit_unrej_any_wd, false,
|
||||
"Don't bother with word plausibility", this->params()),
|
||||
BOOL_MEMBER(tessedit_fix_hyphens, true, "Crunch double hyphens?",
|
||||
this->params()),
|
||||
BOOL_MEMBER(tessedit_redo_xheight, true, "Check/Correct x-height",
|
||||
this->params()),
|
||||
BOOL_MEMBER(tessedit_enable_doc_dict, true,
|
||||
"Add words to the document dictionary", this->params()),
|
||||
BOOL_MEMBER(tessedit_debug_fonts, false, "Output font info per char",
|
||||
this->params()),
|
||||
BOOL_MEMBER(tessedit_debug_block_rejection, false, "Block and Row stats",
|
||||
this->params()),
|
||||
BOOL_MEMBER(tessedit_enable_bigram_correction, true,
|
||||
"Enable correction based on the word bigram dictionary.",
|
||||
this->params()),
|
||||
BOOL_MEMBER(tessedit_enable_dict_correction, false,
|
||||
"Enable single word correction based on the dictionary.",
|
||||
this->params()),
|
||||
INT_MEMBER(tessedit_bigram_debug, 0,
|
||||
"Amount of debug output for bigram correction.",
|
||||
this->params()),
|
||||
BOOL_MEMBER(enable_noise_removal, true,
|
||||
"Remove and conditionally reassign small outlines when they"
|
||||
" confuse layout analysis, determining diacritics vs noise",
|
||||
this->params()),
|
||||
INT_MEMBER(debug_noise_removal, 0, "Debug reassignment of small outlines",
|
||||
this->params()),
|
||||
// Worst (min) certainty, for which a diacritic is allowed to make the
|
||||
// base
|
||||
// character worse and still be included.
|
||||
double_MEMBER(noise_cert_basechar, -8.0,
|
||||
"Hingepoint for base char certainty", this->params()),
|
||||
// Worst (min) certainty, for which a non-overlapping diacritic is allowed
|
||||
// to make the base character worse and still be included.
|
||||
double_MEMBER(noise_cert_disjoint, -1.0,
|
||||
"Hingepoint for disjoint certainty", this->params()),
|
||||
// Worst (min) certainty, for which a diacritic is allowed to make a new
|
||||
// stand-alone blob.
|
||||
double_MEMBER(noise_cert_punc, -3.0,
|
||||
"Threshold for new punc char certainty", this->params()),
|
||||
// Factor of certainty margin for adding diacritics to not count as worse.
|
||||
double_MEMBER(noise_cert_factor, 0.375,
|
||||
"Scaling on certainty diff from Hingepoint",
|
||||
this->params()),
|
||||
INT_MEMBER(noise_maxperblob, 8, "Max diacritics to apply to a blob",
|
||||
this->params()),
|
||||
INT_MEMBER(noise_maxperword, 16, "Max diacritics to apply to a word",
|
||||
this->params()),
|
||||
INT_MEMBER(debug_x_ht_level, 0, "Reestimate debug", this->params()),
|
||||
BOOL_MEMBER(debug_acceptable_wds, false, "Dump word pass/fail chk",
|
||||
this->params()),
|
||||
STRING_MEMBER(chs_leading_punct, "('`\"", "Leading punctuation",
|
||||
this->params()),
|
||||
STRING_MEMBER(chs_trailing_punct1, ").,;:?!", "1st Trailing punctuation",
|
||||
this->params()),
|
||||
STRING_MEMBER(chs_trailing_punct2, ")'`\"", "2nd Trailing punctuation",
|
||||
this->params()),
|
||||
double_MEMBER(quality_rej_pc, 0.08,
|
||||
"good_quality_doc lte rejection limit", this->params()),
|
||||
double_MEMBER(quality_blob_pc, 0.0,
|
||||
"good_quality_doc gte good blobs limit", this->params()),
|
||||
double_MEMBER(quality_outline_pc, 1.0,
|
||||
"good_quality_doc lte outline error limit", this->params()),
|
||||
double_MEMBER(quality_char_pc, 0.95,
|
||||
"good_quality_doc gte good char limit", this->params()),
|
||||
INT_MEMBER(quality_min_initial_alphas_reqd, 2, "alphas in a good word",
|
||||
this->params()),
|
||||
INT_MEMBER(tessedit_tess_adaption_mode, 0x27,
|
||||
"Adaptation decision algorithm for tess", this->params()),
|
||||
BOOL_MEMBER(tessedit_minimal_rej_pass1, false,
|
||||
"Do minimal rejection on pass 1 output", this->params()),
|
||||
BOOL_MEMBER(tessedit_test_adaption, false, "Test adaption criteria",
|
||||
this->params()),
|
||||
BOOL_MEMBER(tessedit_matcher_log, false, "Log matcher activity",
|
||||
this->params()),
|
||||
INT_MEMBER(tessedit_test_adaption_mode, 3,
|
||||
"Adaptation decision algorithm for tess", this->params()),
|
||||
BOOL_MEMBER(test_pt, false, "Test for point", this->params()),
|
||||
double_MEMBER(test_pt_x, 99999.99, "xcoord", this->params()),
|
||||
double_MEMBER(test_pt_y, 99999.99, "ycoord", this->params()),
|
||||
INT_MEMBER(paragraph_debug_level, 0, "Print paragraph debug info.",
|
||||
this->params()),
|
||||
BOOL_MEMBER(paragraph_text_based, true,
|
||||
"Run paragraph detection on the post-text-recognition "
|
||||
"(more accurate)",
|
||||
this->params()),
|
||||
INT_MEMBER(cube_debug_level, 0, "Print cube debug info.", this->params()),
|
||||
STRING_MEMBER(outlines_odd, "%| ", "Non standard number of outlines",
|
||||
this->params()),
|
||||
STRING_MEMBER(outlines_2, "ij!?%\":;", "Non standard number of outlines",
|
||||
this->params()),
|
||||
BOOL_MEMBER(docqual_excuse_outline_errs, false,
|
||||
"Allow outline errs in unrejection?", this->params()),
|
||||
BOOL_MEMBER(tessedit_good_quality_unrej, true,
|
||||
"Reduce rejection on good docs", this->params()),
|
||||
BOOL_MEMBER(tessedit_use_reject_spaces, true, "Reject spaces?",
|
||||
this->params()),
|
||||
double_MEMBER(tessedit_reject_doc_percent, 65.00,
|
||||
"%rej allowed before rej whole doc", this->params()),
|
||||
double_MEMBER(tessedit_reject_block_percent, 45.00,
|
||||
"%rej allowed before rej whole block", this->params()),
|
||||
double_MEMBER(tessedit_reject_row_percent, 40.00,
|
||||
"%rej allowed before rej whole row", this->params()),
|
||||
double_MEMBER(tessedit_whole_wd_rej_row_percent, 70.00,
|
||||
"Number of row rejects in whole word rejects"
|
||||
"which prevents whole row rejection",
|
||||
this->params()),
|
||||
BOOL_MEMBER(tessedit_preserve_blk_rej_perfect_wds, true,
|
||||
"Only rej partially rejected words in block rejection",
|
||||
this->params()),
|
||||
BOOL_MEMBER(tessedit_preserve_row_rej_perfect_wds, true,
|
||||
"Only rej partially rejected words in row rejection",
|
||||
this->params()),
|
||||
BOOL_MEMBER(tessedit_dont_blkrej_good_wds, false,
|
||||
"Use word segmentation quality metric", this->params()),
|
||||
BOOL_MEMBER(tessedit_dont_rowrej_good_wds, false,
|
||||
"Use word segmentation quality metric", this->params()),
|
||||
INT_MEMBER(tessedit_preserve_min_wd_len, 2,
|
||||
"Only preserve wds longer than this", this->params()),
|
||||
BOOL_MEMBER(tessedit_row_rej_good_docs, true,
|
||||
"Apply row rejection to good docs", this->params()),
|
||||
double_MEMBER(tessedit_good_doc_still_rowrej_wd, 1.1,
|
||||
"rej good doc wd if more than this fraction rejected",
|
||||
this->params()),
|
||||
BOOL_MEMBER(tessedit_reject_bad_qual_wds, true,
|
||||
"Reject all bad quality wds", this->params()),
|
||||
BOOL_MEMBER(tessedit_debug_doc_rejection, false, "Page stats",
|
||||
this->params()),
|
||||
BOOL_MEMBER(tessedit_debug_quality_metrics, false,
|
||||
"Output data to debug file", this->params()),
|
||||
BOOL_MEMBER(bland_unrej, false, "unrej potential with no chekcs",
|
||||
this->params()),
|
||||
double_MEMBER(quality_rowrej_pc, 1.1,
|
||||
"good_quality_doc gte good char limit", this->params()),
|
||||
BOOL_MEMBER(unlv_tilde_crunching, true,
|
||||
"Mark v.bad words for tilde crunch", this->params()),
|
||||
BOOL_MEMBER(hocr_font_info, false, "Add font info to hocr output",
|
||||
this->params()),
|
||||
BOOL_MEMBER(crunch_early_merge_tess_fails, true, "Before word crunch?",
|
||||
this->params()),
|
||||
BOOL_MEMBER(crunch_early_convert_bad_unlv_chs, false,
|
||||
"Take out ~^ early?", this->params()),
|
||||
double_MEMBER(crunch_terrible_rating, 80.0, "crunch rating lt this",
|
||||
this->params()),
|
||||
BOOL_MEMBER(crunch_terrible_garbage, true, "As it says", this->params()),
|
||||
double_MEMBER(crunch_poor_garbage_cert, -9.0,
|
||||
"crunch garbage cert lt this", this->params()),
|
||||
double_MEMBER(crunch_poor_garbage_rate, 60,
|
||||
"crunch garbage rating lt this", this->params()),
|
||||
double_MEMBER(crunch_pot_poor_rate, 40, "POTENTIAL crunch rating lt this",
|
||||
this->params()),
|
||||
double_MEMBER(crunch_pot_poor_cert, -8.0, "POTENTIAL crunch cert lt this",
|
||||
this->params()),
|
||||
BOOL_MEMBER(crunch_pot_garbage, true, "POTENTIAL crunch garbage",
|
||||
this->params()),
|
||||
double_MEMBER(crunch_del_rating, 60, "POTENTIAL crunch rating lt this",
|
||||
this->params()),
|
||||
double_MEMBER(crunch_del_cert, -10.0, "POTENTIAL crunch cert lt this",
|
||||
this->params()),
|
||||
double_MEMBER(crunch_del_min_ht, 0.7, "Del if word ht lt xht x this",
|
||||
this->params()),
|
||||
double_MEMBER(crunch_del_max_ht, 3.0, "Del if word ht gt xht x this",
|
||||
this->params()),
|
||||
double_MEMBER(crunch_del_min_width, 3.0,
|
||||
"Del if word width lt xht x this", this->params()),
|
||||
double_MEMBER(crunch_del_high_word, 1.5,
|
||||
"Del if word gt xht x this above bl", this->params()),
|
||||
double_MEMBER(crunch_del_low_word, 0.5,
|
||||
"Del if word gt xht x this below bl", this->params()),
|
||||
double_MEMBER(crunch_small_outlines_size, 0.6, "Small if lt xht x this",
|
||||
this->params()),
|
||||
INT_MEMBER(crunch_rating_max, 10, "For adj length in rating per ch",
|
||||
this->params()),
|
||||
INT_MEMBER(crunch_pot_indicators, 1,
|
||||
"How many potential indicators needed", this->params()),
|
||||
BOOL_MEMBER(crunch_leave_ok_strings, true, "Don't touch sensible strings",
|
||||
this->params()),
|
||||
BOOL_MEMBER(crunch_accept_ok, true, "Use acceptability in okstring",
|
||||
this->params()),
|
||||
BOOL_MEMBER(crunch_leave_accept_strings, false,
|
||||
"Don't pot crunch sensible strings", this->params()),
|
||||
BOOL_MEMBER(crunch_include_numerals, false, "Fiddle alpha figures",
|
||||
this->params()),
|
||||
INT_MEMBER(crunch_leave_lc_strings, 4,
|
||||
"Don't crunch words with long lower case strings",
|
||||
this->params()),
|
||||
INT_MEMBER(crunch_leave_uc_strings, 4,
|
||||
"Don't crunch words with long lower case strings",
|
||||
this->params()),
|
||||
INT_MEMBER(crunch_long_repetitions, 3,
|
||||
"Crunch words with long repetitions", this->params()),
|
||||
INT_MEMBER(crunch_debug, 0, "As it says", this->params()),
|
||||
INT_MEMBER(fixsp_non_noise_limit, 1,
|
||||
"How many non-noise blbs either side?", this->params()),
|
||||
double_MEMBER(fixsp_small_outlines_size, 0.28, "Small if lt xht x this",
|
||||
this->params()),
|
||||
BOOL_MEMBER(tessedit_prefer_joined_punct, false,
|
||||
"Reward punctation joins", this->params()),
|
||||
INT_MEMBER(fixsp_done_mode, 1, "What constitues done for spacing",
|
||||
this->params()),
|
||||
INT_MEMBER(debug_fix_space_level, 0, "Contextual fixspace debug",
|
||||
this->params()),
|
||||
STRING_MEMBER(numeric_punctuation, ".,",
|
||||
"Punct. chs expected WITHIN numbers", this->params()),
|
||||
INT_MEMBER(x_ht_acceptance_tolerance, 8,
|
||||
"Max allowed deviation of blob top outside of font data",
|
||||
this->params()),
|
||||
INT_MEMBER(x_ht_min_change, 8,
|
||||
"Min change in xht before actually trying it", this->params()),
|
||||
INT_MEMBER(superscript_debug, 0,
|
||||
"Debug level for sub & superscript fixer", this->params()),
|
||||
double_MEMBER(
|
||||
superscript_worse_certainty, 2.0,
|
||||
"How many times worse "
|
||||
"certainty does a superscript position glyph need to be for "
|
||||
"us to try classifying it as a char with a different "
|
||||
"baseline?",
|
||||
this->params()),
|
||||
double_MEMBER(
|
||||
superscript_bettered_certainty, 0.97,
|
||||
"What reduction in "
|
||||
"badness do we think sufficient to choose a superscript "
|
||||
"over what we'd thought. For example, a value of 0.6 means "
|
||||
"we want to reduce badness of certainty by at least 40%",
|
||||
this->params()),
|
||||
double_MEMBER(superscript_scaledown_ratio, 0.4,
|
||||
"A superscript scaled down more than this is unbelievably "
|
||||
"small. For example, 0.3 means we expect the font size to "
|
||||
"be no smaller than 30% of the text line font size.",
|
||||
this->params()),
|
||||
double_MEMBER(subscript_max_y_top, 0.5,
|
||||
"Maximum top of a character measured as a multiple of "
|
||||
"x-height above the baseline for us to reconsider whether "
|
||||
"it's a subscript.",
|
||||
this->params()),
|
||||
double_MEMBER(superscript_min_y_bottom, 0.3,
|
||||
"Minimum bottom of a character measured as a multiple of "
|
||||
"x-height above the baseline for us to reconsider whether "
|
||||
"it's a superscript.",
|
||||
this->params()),
|
||||
BOOL_MEMBER(tessedit_write_block_separators, false,
|
||||
"Write block separators in output", this->params()),
|
||||
BOOL_MEMBER(tessedit_write_rep_codes, false, "Write repetition char code",
|
||||
this->params()),
|
||||
BOOL_MEMBER(tessedit_write_unlv, false, "Write .unlv output file",
|
||||
this->params()),
|
||||
BOOL_MEMBER(tessedit_create_txt, false, "Write .txt output file",
|
||||
this->params()),
|
||||
BOOL_MEMBER(tessedit_create_hocr, false, "Write .html hOCR output file",
|
||||
this->params()),
|
||||
BOOL_MEMBER(tessedit_create_tsv, false, "Write .tsv output file",
|
||||
this->params()),
|
||||
BOOL_MEMBER(tessedit_create_pdf, false, "Write .pdf output file",
|
||||
this->params()),
|
||||
BOOL_MEMBER(textonly_pdf, false, "Create PDF with only one invisible text layer",
|
||||
this->params()),
|
||||
STRING_MEMBER(unrecognised_char, "|",
|
||||
"Output char for unidentified blobs", this->params()),
|
||||
INT_MEMBER(suspect_level, 99, "Suspect marker level", this->params()),
|
||||
INT_MEMBER(suspect_space_level, 100,
|
||||
"Min suspect level for rejecting spaces", this->params()),
|
||||
INT_MEMBER(suspect_short_words, 2,
|
||||
"Don't suspect dict wds longer than this", this->params()),
|
||||
BOOL_MEMBER(suspect_constrain_1Il, false, "UNLV keep 1Il chars rejected",
|
||||
this->params()),
|
||||
double_MEMBER(suspect_rating_per_ch, 999.9,
|
||||
"Don't touch bad rating limit", this->params()),
|
||||
double_MEMBER(suspect_accept_rating, -999.9, "Accept good rating limit",
|
||||
this->params()),
|
||||
BOOL_MEMBER(tessedit_minimal_rejection, false,
|
||||
"Only reject tess failures", this->params()),
|
||||
BOOL_MEMBER(tessedit_zero_rejection, false, "Don't reject ANYTHING",
|
||||
this->params()),
|
||||
BOOL_MEMBER(tessedit_word_for_word, false,
|
||||
"Make output have exactly one word per WERD", this->params()),
|
||||
BOOL_MEMBER(tessedit_zero_kelvin_rejection, false,
|
||||
"Don't reject ANYTHING AT ALL", this->params()),
|
||||
BOOL_MEMBER(tessedit_consistent_reps, true,
|
||||
"Force all rep chars the same", this->params()),
|
||||
INT_MEMBER(tessedit_reject_mode, 0, "Rejection algorithm",
|
||||
this->params()),
|
||||
BOOL_MEMBER(tessedit_rejection_debug, false, "Adaption debug",
|
||||
this->params()),
|
||||
BOOL_MEMBER(tessedit_flip_0O, true, "Contextual 0O O0 flips",
|
||||
this->params()),
|
||||
double_MEMBER(tessedit_lower_flip_hyphen, 1.5,
|
||||
"Aspect ratio dot/hyphen test", this->params()),
|
||||
double_MEMBER(tessedit_upper_flip_hyphen, 1.8,
|
||||
"Aspect ratio dot/hyphen test", this->params()),
|
||||
BOOL_MEMBER(rej_trust_doc_dawg, false,
|
||||
"Use DOC dawg in 11l conf. detector", this->params()),
|
||||
BOOL_MEMBER(rej_1Il_use_dict_word, false, "Use dictword test",
|
||||
this->params()),
|
||||
BOOL_MEMBER(rej_1Il_trust_permuter_type, true, "Don't double check",
|
||||
this->params()),
|
||||
BOOL_MEMBER(rej_use_tess_accepted, true, "Individual rejection control",
|
||||
this->params()),
|
||||
BOOL_MEMBER(rej_use_tess_blanks, true, "Individual rejection control",
|
||||
this->params()),
|
||||
BOOL_MEMBER(rej_use_good_perm, true, "Individual rejection control",
|
||||
this->params()),
|
||||
BOOL_MEMBER(rej_use_sensible_wd, false, "Extend permuter check",
|
||||
this->params()),
|
||||
BOOL_MEMBER(rej_alphas_in_number_perm, false, "Extend permuter check",
|
||||
this->params()),
|
||||
double_MEMBER(rej_whole_of_mostly_reject_word_fract, 0.85,
|
||||
"if >this fract", this->params()),
|
||||
INT_MEMBER(tessedit_image_border, 2, "Rej blbs near image edge limit",
|
||||
this->params()),
|
||||
STRING_MEMBER(ok_repeated_ch_non_alphanum_wds, "-?*\075",
|
||||
"Allow NN to unrej", this->params()),
|
||||
STRING_MEMBER(conflict_set_I_l_1, "Il1[]", "Il1 conflict set",
|
||||
this->params()),
|
||||
INT_MEMBER(min_sane_x_ht_pixels, 8, "Reject any x-ht lt or eq than this",
|
||||
this->params()),
|
||||
BOOL_MEMBER(tessedit_create_boxfile, false, "Output text with boxes",
|
||||
this->params()),
|
||||
INT_MEMBER(tessedit_page_number, -1,
|
||||
"-1 -> All pages"
|
||||
" , else specifc page to process",
|
||||
this->params()),
|
||||
BOOL_MEMBER(tessedit_write_images, false,
|
||||
"Capture the image from the IPE", this->params()),
|
||||
BOOL_MEMBER(interactive_display_mode, false, "Run interactively?",
|
||||
this->params()),
|
||||
STRING_MEMBER(file_type, ".tif", "Filename extension", this->params()),
|
||||
BOOL_MEMBER(tessedit_override_permuter, true, "According to dict_word",
|
||||
this->params()),
|
||||
INT_MEMBER(tessdata_manager_debug_level, 0,
|
||||
"Debug level for"
|
||||
" TessdataManager functions.",
|
||||
this->params()),
|
||||
STRING_MEMBER(tessedit_load_sublangs, "",
|
||||
"List of languages to load with this one", this->params()),
|
||||
BOOL_MEMBER(tessedit_use_primary_params_model, false,
|
||||
"In multilingual mode use params model of the"
|
||||
" primary language",
|
||||
this->params()),
|
||||
double_MEMBER(min_orientation_margin, 7.0,
|
||||
"Min acceptable orientation margin", this->params()),
|
||||
BOOL_MEMBER(textord_tabfind_show_vlines, false, "Debug line finding",
|
||||
this->params()),
|
||||
BOOL_MEMBER(textord_use_cjk_fp_model, FALSE, "Use CJK fixed pitch model",
|
||||
this->params()),
|
||||
BOOL_MEMBER(poly_allow_detailed_fx, false,
|
||||
"Allow feature extractors to see the original outline",
|
||||
this->params()),
|
||||
BOOL_INIT_MEMBER(tessedit_init_config_only, false,
|
||||
"Only initialize with the config file. Useful if the "
|
||||
"instance is not going to be used for OCR but say only "
|
||||
"for layout analysis.",
|
||||
this->params()),
|
||||
BOOL_MEMBER(textord_equation_detect, false, "Turn on equation detector",
|
||||
this->params()),
|
||||
BOOL_MEMBER(textord_tabfind_vertical_text, true,
|
||||
"Enable vertical detection", this->params()),
|
||||
BOOL_MEMBER(textord_tabfind_force_vertical_text, false,
|
||||
"Force using vertical text page mode", this->params()),
|
||||
double_MEMBER(
|
||||
textord_tabfind_vertical_text_ratio, 0.5,
|
||||
"Fraction of textlines deemed vertical to use vertical page "
|
||||
"mode",
|
||||
this->params()),
|
||||
double_MEMBER(
|
||||
textord_tabfind_aligned_gap_fraction, 0.75,
|
||||
"Fraction of height used as a minimum gap for aligned blobs.",
|
||||
this->params()),
|
||||
INT_MEMBER(tessedit_parallelize, 0, "Run in parallel where possible",
|
||||
this->params()),
|
||||
BOOL_MEMBER(preserve_interword_spaces, false,
|
||||
"Preserve multiple interword spaces", this->params()),
|
||||
BOOL_MEMBER(include_page_breaks, FALSE,
|
||||
"Include page separator string in output text after each "
|
||||
"image/page.",
|
||||
this->params()),
|
||||
STRING_MEMBER(page_separator, "\f",
|
||||
"Page separator (default is form feed control character)",
|
||||
this->params()),
|
||||
|
||||
// The following parameters were deprecated and removed from their
|
||||
// original
|
||||
// locations. The parameters are temporarily kept here to give Tesseract
|
||||
// users a chance to updated their [lang].traineddata and config files
|
||||
// without introducing failures during Tesseract initialization.
|
||||
// TODO(ocr-team): remove these parameters from the code once we are
|
||||
// reasonably sure that Tesseract users have updated their data files.
|
||||
//
|
||||
// BEGIN DEPRECATED PARAMETERS
|
||||
BOOL_MEMBER(textord_tabfind_vertical_horizontal_mix, true,
|
||||
"find horizontal lines such as headers in vertical page mode",
|
||||
this->params()),
|
||||
INT_MEMBER(tessedit_ok_mode, 5, "Acceptance decision algorithm",
|
||||
this->params()),
|
||||
BOOL_INIT_MEMBER(load_fixed_length_dawgs, true,
|
||||
"Load fixed length dawgs"
|
||||
" (e.g. for non-space delimited languages)",
|
||||
this->params()),
|
||||
INT_MEMBER(segment_debug, 0, "Debug the whole segmentation process",
|
||||
this->params()),
|
||||
BOOL_MEMBER(permute_debug, 0, "Debug char permutation process",
|
||||
this->params()),
|
||||
double_MEMBER(bestrate_pruning_factor, 2.0,
|
||||
"Multiplying factor of"
|
||||
" current best rate to prune other hypotheses",
|
||||
this->params()),
|
||||
BOOL_MEMBER(permute_script_word, 0,
|
||||
"Turn on word script consistency permuter", this->params()),
|
||||
BOOL_MEMBER(segment_segcost_rating, 0,
|
||||
"incorporate segmentation cost in word rating?",
|
||||
this->params()),
|
||||
double_MEMBER(segment_reward_script, 0.95,
|
||||
"Score multipler for script consistency within a word. "
|
||||
"Being a 'reward' factor, it should be <= 1. "
|
||||
"Smaller value implies bigger reward.",
|
||||
this->params()),
|
||||
BOOL_MEMBER(permute_fixed_length_dawg, 0,
|
||||
"Turn on fixed-length phrasebook search permuter",
|
||||
this->params()),
|
||||
BOOL_MEMBER(permute_chartype_word, 0,
|
||||
"Turn on character type (property) consistency permuter",
|
||||
this->params()),
|
||||
double_MEMBER(segment_reward_chartype, 0.97,
|
||||
"Score multipler for char type consistency within a word. ",
|
||||
this->params()),
|
||||
double_MEMBER(segment_reward_ngram_best_choice, 0.99,
|
||||
"Score multipler for ngram permuter's best choice"
|
||||
" (only used in the Han script path).",
|
||||
this->params()),
|
||||
BOOL_MEMBER(ngram_permuter_activated, false,
|
||||
"Activate character-level n-gram-based permuter",
|
||||
this->params()),
|
||||
BOOL_MEMBER(permute_only_top, false, "Run only the top choice permuter",
|
||||
this->params()),
|
||||
INT_MEMBER(language_model_fixed_length_choices_depth, 3,
|
||||
"Depth of blob choice lists to explore"
|
||||
" when fixed length dawgs are on",
|
||||
this->params()),
|
||||
BOOL_MEMBER(use_new_state_cost, FALSE,
|
||||
"use new state cost heuristics for segmentation state"
|
||||
" evaluation",
|
||||
this->params()),
|
||||
double_MEMBER(heuristic_segcost_rating_base, 1.25,
|
||||
"base factor for adding segmentation cost into word rating."
|
||||
"It's a multiplying factor, the larger the value above 1, "
|
||||
"the bigger the effect of segmentation cost.",
|
||||
this->params()),
|
||||
double_MEMBER(heuristic_weight_rating, 1.0,
|
||||
"weight associated with char rating in combined cost of"
|
||||
"state",
|
||||
this->params()),
|
||||
double_MEMBER(heuristic_weight_width, 1000.0,
|
||||
"weight associated with width evidence in combined cost of"
|
||||
" state",
|
||||
this->params()),
|
||||
double_MEMBER(heuristic_weight_seamcut, 0.0,
|
||||
"weight associated with seam cut in combined cost of state",
|
||||
this->params()),
|
||||
double_MEMBER(heuristic_max_char_wh_ratio, 2.0,
|
||||
"max char width-to-height ratio allowed in segmentation",
|
||||
this->params()),
|
||||
BOOL_MEMBER(enable_new_segsearch, true,
|
||||
"Enable new segmentation search path.", this->params()),
|
||||
double_MEMBER(segsearch_max_fixed_pitch_char_wh_ratio, 2.0,
|
||||
"Maximum character width-to-height ratio for"
|
||||
" fixed-pitch fonts",
|
||||
this->params()),
|
||||
// END DEPRECATED PARAMETERS
|
||||
|
||||
backup_config_file_(NULL),
|
||||
pix_binary_(NULL),
|
||||
cube_binary_(NULL),
|
||||
pix_grey_(NULL),
|
||||
pix_original_(NULL),
|
||||
pix_thresholds_(NULL),
|
||||
source_resolution_(0),
|
||||
textord_(this),
|
||||
right_to_left_(false),
|
||||
scaled_color_(NULL),
|
||||
scaled_factor_(-1),
|
||||
deskew_(1.0f, 0.0f),
|
||||
reskew_(1.0f, 0.0f),
|
||||
most_recently_used_(this),
|
||||
font_table_size_(0),
|
||||
#ifndef NO_CUBE_BUILD
|
||||
cube_cntxt_(NULL),
|
||||
tess_cube_combiner_(NULL),
|
||||
#endif
|
||||
equ_detect_(NULL) {
|
||||
}
|
||||
|
||||
Tesseract::~Tesseract() {
|
||||
Clear();
|
||||
pixDestroy(&pix_original_);
|
||||
end_tesseract();
|
||||
sub_langs_.delete_data_pointers();
|
||||
#ifndef NO_CUBE_BUILD
|
||||
// Delete cube objects.
|
||||
if (cube_cntxt_ != NULL) {
|
||||
delete cube_cntxt_;
|
||||
cube_cntxt_ = NULL;
|
||||
}
|
||||
if (tess_cube_combiner_ != NULL) {
|
||||
delete tess_cube_combiner_;
|
||||
tess_cube_combiner_ = NULL;
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
void Tesseract::Clear() {
|
||||
pixDestroy(&pix_binary_);
|
||||
pixDestroy(&cube_binary_);
|
||||
pixDestroy(&pix_grey_);
|
||||
pixDestroy(&pix_thresholds_);
|
||||
pixDestroy(&scaled_color_);
|
||||
deskew_ = FCOORD(1.0f, 0.0f);
|
||||
reskew_ = FCOORD(1.0f, 0.0f);
|
||||
splitter_.Clear();
|
||||
scaled_factor_ = -1;
|
||||
for (int i = 0; i < sub_langs_.size(); ++i)
|
||||
sub_langs_[i]->Clear();
|
||||
}
|
||||
|
||||
void Tesseract::SetEquationDetect(EquationDetect* detector) {
|
||||
equ_detect_ = detector;
|
||||
equ_detect_->SetLangTesseract(this);
|
||||
}
|
||||
|
||||
// Clear all memory of adaption for this and all subclassifiers.
|
||||
void Tesseract::ResetAdaptiveClassifier() {
|
||||
ResetAdaptiveClassifierInternal();
|
||||
for (int i = 0; i < sub_langs_.size(); ++i) {
|
||||
sub_langs_[i]->ResetAdaptiveClassifierInternal();
|
||||
}
|
||||
}
|
||||
|
||||
// Clear the document dictionary for this and all subclassifiers.
|
||||
void Tesseract::ResetDocumentDictionary() {
|
||||
getDict().ResetDocumentDictionary();
|
||||
for (int i = 0; i < sub_langs_.size(); ++i) {
|
||||
sub_langs_[i]->getDict().ResetDocumentDictionary();
|
||||
}
|
||||
}
|
||||
|
||||
void Tesseract::SetBlackAndWhitelist() {
|
||||
// Set the white and blacklists (if any)
|
||||
unicharset.set_black_and_whitelist(tessedit_char_blacklist.string(),
|
||||
tessedit_char_whitelist.string(),
|
||||
tessedit_char_unblacklist.string());
|
||||
// Black and white lists should apply to all loaded classifiers.
|
||||
for (int i = 0; i < sub_langs_.size(); ++i) {
|
||||
sub_langs_[i]->unicharset.set_black_and_whitelist(
|
||||
tessedit_char_blacklist.string(), tessedit_char_whitelist.string(),
|
||||
tessedit_char_unblacklist.string());
|
||||
}
|
||||
}
|
||||
|
||||
// Perform steps to prepare underlying binary image/other data structures for
|
||||
// page segmentation.
|
||||
void Tesseract::PrepareForPageseg() {
|
||||
textord_.set_use_cjk_fp_model(textord_use_cjk_fp_model);
|
||||
pixDestroy(&cube_binary_);
|
||||
cube_binary_ = pixClone(pix_binary());
|
||||
// Find the max splitter strategy over all langs.
|
||||
ShiroRekhaSplitter::SplitStrategy max_pageseg_strategy =
|
||||
static_cast<ShiroRekhaSplitter::SplitStrategy>(
|
||||
static_cast<inT32>(pageseg_devanagari_split_strategy));
|
||||
for (int i = 0; i < sub_langs_.size(); ++i) {
|
||||
ShiroRekhaSplitter::SplitStrategy pageseg_strategy =
|
||||
static_cast<ShiroRekhaSplitter::SplitStrategy>(
|
||||
static_cast<inT32>(sub_langs_[i]->pageseg_devanagari_split_strategy));
|
||||
if (pageseg_strategy > max_pageseg_strategy)
|
||||
max_pageseg_strategy = pageseg_strategy;
|
||||
// Clone the cube image to all the sub langs too.
|
||||
pixDestroy(&sub_langs_[i]->cube_binary_);
|
||||
sub_langs_[i]->cube_binary_ = pixClone(pix_binary());
|
||||
pixDestroy(&sub_langs_[i]->pix_binary_);
|
||||
sub_langs_[i]->pix_binary_ = pixClone(pix_binary());
|
||||
}
|
||||
// Perform shiro-rekha (top-line) splitting and replace the current image by
|
||||
// the newly splitted image.
|
||||
splitter_.set_orig_pix(pix_binary());
|
||||
splitter_.set_pageseg_split_strategy(max_pageseg_strategy);
|
||||
if (splitter_.Split(true)) {
|
||||
ASSERT_HOST(splitter_.splitted_image());
|
||||
pixDestroy(&pix_binary_);
|
||||
pix_binary_ = pixClone(splitter_.splitted_image());
|
||||
}
|
||||
}
|
||||
|
||||
// Perform steps to prepare underlying binary image/other data structures for
|
||||
// OCR. The current segmentation is required by this method.
|
||||
// Note that this method resets pix_binary_ to the original binarized image,
|
||||
// which may be different from the image actually used for OCR depending on the
|
||||
// value of devanagari_ocr_split_strategy.
|
||||
void Tesseract::PrepareForTessOCR(BLOCK_LIST* block_list,
|
||||
Tesseract* osd_tess, OSResults* osr) {
|
||||
// Find the max splitter strategy over all langs.
|
||||
ShiroRekhaSplitter::SplitStrategy max_ocr_strategy =
|
||||
static_cast<ShiroRekhaSplitter::SplitStrategy>(
|
||||
static_cast<inT32>(ocr_devanagari_split_strategy));
|
||||
for (int i = 0; i < sub_langs_.size(); ++i) {
|
||||
ShiroRekhaSplitter::SplitStrategy ocr_strategy =
|
||||
static_cast<ShiroRekhaSplitter::SplitStrategy>(
|
||||
static_cast<inT32>(sub_langs_[i]->ocr_devanagari_split_strategy));
|
||||
if (ocr_strategy > max_ocr_strategy)
|
||||
max_ocr_strategy = ocr_strategy;
|
||||
}
|
||||
// Utilize the segmentation information available.
|
||||
splitter_.set_segmentation_block_list(block_list);
|
||||
splitter_.set_ocr_split_strategy(max_ocr_strategy);
|
||||
// Run the splitter for OCR
|
||||
bool split_for_ocr = splitter_.Split(false);
|
||||
// Restore pix_binary to the binarized original pix for future reference.
|
||||
ASSERT_HOST(splitter_.orig_pix());
|
||||
pixDestroy(&pix_binary_);
|
||||
pix_binary_ = pixClone(splitter_.orig_pix());
|
||||
// If the pageseg and ocr strategies are different, refresh the block list
|
||||
// (from the last SegmentImage call) with blobs from the real image to be used
|
||||
// for OCR.
|
||||
if (splitter_.HasDifferentSplitStrategies()) {
|
||||
BLOCK block("", TRUE, 0, 0, 0, 0, pixGetWidth(pix_binary_),
|
||||
pixGetHeight(pix_binary_));
|
||||
Pix* pix_for_ocr = split_for_ocr ? splitter_.splitted_image() :
|
||||
splitter_.orig_pix();
|
||||
extract_edges(pix_for_ocr, &block);
|
||||
splitter_.RefreshSegmentationWithNewBlobs(block.blob_list());
|
||||
}
|
||||
// The splitter isn't needed any more after this, so save memory by clearing.
|
||||
splitter_.Clear();
|
||||
}
|
||||
|
||||
} // namespace tesseract
|
File diff suppressed because it is too large
Load Diff
|
@ -0,0 +1,24 @@
|
|||
/**********************************************************************
|
||||
* File: tessvars.cpp (Formerly tessvars.c)
|
||||
* Description: Variables and other globals for tessedit.
|
||||
* Author: Ray Smith
|
||||
* Created: Mon Apr 13 13:13:23 BST 1992
|
||||
*
|
||||
* (C) Copyright 1992, Hewlett-Packard Ltd.
|
||||
** Licensed under the Apache License, Version 2.0 (the "License");
|
||||
** you may not use this file except in compliance with the License.
|
||||
** You may obtain a copy of the License at
|
||||
** http://www.apache.org/licenses/LICENSE-2.0
|
||||
** Unless required by applicable law or agreed to in writing, software
|
||||
** distributed under the License is distributed on an "AS IS" BASIS,
|
||||
** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
** See the License for the specific language governing permissions and
|
||||
** limitations under the License.
|
||||
*
|
||||
**********************************************************************/
|
||||
|
||||
#include <stdio.h>
|
||||
|
||||
#include "tessvars.h"
|
||||
|
||||
FILE *debug_fp = stderr; // write debug stuff here
|
|
@ -0,0 +1,27 @@
|
|||
/**********************************************************************
|
||||
* File: tessvars.h (Formerly tessvars.h)
|
||||
* Description: Variables and other globals for tessedit.
|
||||
* Author: Ray Smith
|
||||
* Created: Mon Apr 13 13:13:23 BST 1992
|
||||
*
|
||||
* (C) Copyright 1992, Hewlett-Packard Ltd.
|
||||
** Licensed under the Apache License, Version 2.0 (the "License");
|
||||
** you may not use this file except in compliance with the License.
|
||||
** You may obtain a copy of the License at
|
||||
** http://www.apache.org/licenses/LICENSE-2.0
|
||||
** Unless required by applicable law or agreed to in writing, software
|
||||
** distributed under the License is distributed on an "AS IS" BASIS,
|
||||
** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
** See the License for the specific language governing permissions and
|
||||
** limitations under the License.
|
||||
*
|
||||
**********************************************************************/
|
||||
|
||||
#ifndef TESSVARS_H
|
||||
#define TESSVARS_H
|
||||
|
||||
#include <stdio.h>
|
||||
|
||||
|
||||
extern FILE *debug_fp; // write debug stuff here
|
||||
#endif
|
|
@ -0,0 +1,330 @@
|
|||
/**********************************************************************
|
||||
* File: tfacepp.cpp (Formerly tface++.c)
|
||||
* Description: C++ side of the C/C++ Tess/Editor interface.
|
||||
* Author: Ray Smith
|
||||
* Created: Thu Apr 23 15:39:23 BST 1992
|
||||
*
|
||||
* (C) Copyright 1992, Hewlett-Packard Ltd.
|
||||
** Licensed under the Apache License, Version 2.0 (the "License");
|
||||
** you may not use this file except in compliance with the License.
|
||||
** You may obtain a copy of the License at
|
||||
** http://www.apache.org/licenses/LICENSE-2.0
|
||||
** Unless required by applicable law or agreed to in writing, software
|
||||
** distributed under the License is distributed on an "AS IS" BASIS,
|
||||
** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
** See the License for the specific language governing permissions and
|
||||
** limitations under the License.
|
||||
*
|
||||
**********************************************************************/
|
||||
|
||||
#ifdef _MSC_VER
|
||||
#pragma warning(disable:4244) // Conversion warnings
|
||||
#pragma warning(disable:4305) // int/float warnings
|
||||
#pragma warning(disable:4800) // int/bool warnings
|
||||
#endif
|
||||
|
||||
#include <math.h>
|
||||
|
||||
#include "blamer.h"
|
||||
#include "errcode.h"
|
||||
#include "ratngs.h"
|
||||
#include "reject.h"
|
||||
#include "tesseractclass.h"
|
||||
#include "werd.h"
|
||||
|
||||
#define MAX_UNDIVIDED_LENGTH 24
|
||||
|
||||
|
||||
|
||||
/**********************************************************************
|
||||
* recog_word
|
||||
*
|
||||
* Convert the word to tess form and pass it to the tess segmenter.
|
||||
* Convert the output back to editor form.
|
||||
**********************************************************************/
|
||||
namespace tesseract {
|
||||
void Tesseract::recog_word(WERD_RES *word) {
|
||||
if (wordrec_skip_no_truth_words && (word->blamer_bundle == NULL ||
|
||||
word->blamer_bundle->incorrect_result_reason() == IRR_NO_TRUTH)) {
|
||||
if (classify_debug_level) tprintf("No truth for word - skipping\n");
|
||||
word->tess_failed = true;
|
||||
return;
|
||||
}
|
||||
ASSERT_HOST(!word->chopped_word->blobs.empty());
|
||||
recog_word_recursive(word);
|
||||
word->SetupBoxWord();
|
||||
if (word->best_choice->length() != word->box_word->length()) {
|
||||
tprintf("recog_word ASSERT FAIL String:\"%s\"; "
|
||||
"Strlen=%d; #Blobs=%d\n",
|
||||
word->best_choice->debug_string().string(),
|
||||
word->best_choice->length(), word->box_word->length());
|
||||
}
|
||||
ASSERT_HOST(word->best_choice->length() == word->box_word->length());
|
||||
// Check that the ratings matrix size matches the sum of all the
|
||||
// segmentation states.
|
||||
if (!word->StatesAllValid()) {
|
||||
tprintf("Not all words have valid states relative to ratings matrix!!");
|
||||
word->DebugWordChoices(true, NULL);
|
||||
ASSERT_HOST(word->StatesAllValid());
|
||||
}
|
||||
if (tessedit_override_permuter) {
|
||||
/* Override the permuter type if a straight dictionary check disagrees. */
|
||||
uinT8 perm_type = word->best_choice->permuter();
|
||||
if ((perm_type != SYSTEM_DAWG_PERM) &&
|
||||
(perm_type != FREQ_DAWG_PERM) && (perm_type != USER_DAWG_PERM)) {
|
||||
uinT8 real_dict_perm_type = dict_word(*word->best_choice);
|
||||
if (((real_dict_perm_type == SYSTEM_DAWG_PERM) ||
|
||||
(real_dict_perm_type == FREQ_DAWG_PERM) ||
|
||||
(real_dict_perm_type == USER_DAWG_PERM)) &&
|
||||
(alpha_count(word->best_choice->unichar_string().string(),
|
||||
word->best_choice->unichar_lengths().string()) > 0)) {
|
||||
word->best_choice->set_permuter(real_dict_perm_type); // use dict perm
|
||||
}
|
||||
}
|
||||
if (tessedit_rejection_debug &&
|
||||
perm_type != word->best_choice->permuter()) {
|
||||
tprintf("Permuter Type Flipped from %d to %d\n",
|
||||
perm_type, word->best_choice->permuter());
|
||||
}
|
||||
}
|
||||
// Factored out from control.cpp
|
||||
ASSERT_HOST((word->best_choice == NULL) == (word->raw_choice == NULL));
|
||||
if (word->best_choice == NULL || word->best_choice->length() == 0 ||
|
||||
static_cast<int>(strspn(word->best_choice->unichar_string().string(),
|
||||
" ")) == word->best_choice->length()) {
|
||||
word->tess_failed = true;
|
||||
word->reject_map.initialise(word->box_word->length());
|
||||
word->reject_map.rej_word_tess_failure();
|
||||
}
|
||||
else {
|
||||
word->tess_failed = false;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/**********************************************************************
|
||||
* recog_word_recursive
|
||||
*
|
||||
* Convert the word to tess form and pass it to the tess segmenter.
|
||||
* Convert the output back to editor form.
|
||||
**********************************************************************/
|
||||
void Tesseract::recog_word_recursive(WERD_RES *word) {
|
||||
int word_length = word->chopped_word->NumBlobs(); // no of blobs
|
||||
if (word_length > MAX_UNDIVIDED_LENGTH) {
|
||||
return split_and_recog_word(word);
|
||||
}
|
||||
cc_recog(word);
|
||||
word_length = word->rebuild_word->NumBlobs(); // No of blobs in output.
|
||||
|
||||
// Do sanity checks and minor fixes on best_choice.
|
||||
if (word->best_choice->length() > word_length) {
|
||||
word->best_choice->make_bad(); // should never happen
|
||||
tprintf("recog_word: Discarded long string \"%s\""
|
||||
" (%d characters vs %d blobs)\n",
|
||||
word->best_choice->unichar_string().string(),
|
||||
word->best_choice->length(), word_length);
|
||||
tprintf("Word is at:");
|
||||
word->word->bounding_box().print();
|
||||
}
|
||||
if (word->best_choice->length() < word_length) {
|
||||
UNICHAR_ID space_id = unicharset.unichar_to_id(" ");
|
||||
while (word->best_choice->length() < word_length) {
|
||||
word->best_choice->append_unichar_id(space_id, 1, 0.0,
|
||||
word->best_choice->certainty());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/**********************************************************************
|
||||
* split_and_recog_word
|
||||
*
|
||||
* Split the word into 2 smaller pieces at the largest gap.
|
||||
* Recognize the pieces and stick the results back together.
|
||||
**********************************************************************/
|
||||
void Tesseract::split_and_recog_word(WERD_RES *word) {
|
||||
// Find the biggest blob gap in the chopped_word.
|
||||
int bestgap = -MAX_INT32;
|
||||
int split_index = 0;
|
||||
for (int b = 1; b < word->chopped_word->NumBlobs(); ++b) {
|
||||
TBOX prev_box = word->chopped_word->blobs[b - 1]->bounding_box();
|
||||
TBOX blob_box = word->chopped_word->blobs[b]->bounding_box();
|
||||
int gap = blob_box.left() - prev_box.right();
|
||||
if (gap > bestgap) {
|
||||
bestgap = gap;
|
||||
split_index = b;
|
||||
}
|
||||
}
|
||||
ASSERT_HOST(split_index > 0);
|
||||
|
||||
WERD_RES *word2 = NULL;
|
||||
BlamerBundle *orig_bb = NULL;
|
||||
split_word(word, split_index, &word2, &orig_bb);
|
||||
|
||||
// Recognize the first part of the word.
|
||||
recog_word_recursive(word);
|
||||
// Recognize the second part of the word.
|
||||
recog_word_recursive(word2);
|
||||
|
||||
join_words(word, word2, orig_bb);
|
||||
}
|
||||
|
||||
|
||||
/**********************************************************************
|
||||
* split_word
|
||||
*
|
||||
* Split a given WERD_RES in place into two smaller words for recognition.
|
||||
* split_pt is the index of the first blob to go in the second word.
|
||||
* The underlying word is left alone, only the TWERD (and subsequent data)
|
||||
* are split up. orig_blamer_bundle is set to the original blamer bundle,
|
||||
* and will now be owned by the caller. New blamer bundles are forged for the
|
||||
* two pieces.
|
||||
**********************************************************************/
|
||||
void Tesseract::split_word(WERD_RES *word,
|
||||
int split_pt,
|
||||
WERD_RES **right_piece,
|
||||
BlamerBundle **orig_blamer_bundle) const {
|
||||
ASSERT_HOST(split_pt > 0 && split_pt < word->chopped_word->NumBlobs());
|
||||
|
||||
// Save a copy of the blamer bundle so we can try to reconstruct it below.
|
||||
BlamerBundle *orig_bb =
|
||||
word->blamer_bundle ? new BlamerBundle(*word->blamer_bundle) : NULL;
|
||||
|
||||
WERD_RES *word2 = new WERD_RES(*word);
|
||||
|
||||
// blow away the copied chopped_word, as we want to work with
|
||||
// the blobs from the input chopped_word so seam_arrays can be merged.
|
||||
TWERD *chopped = word->chopped_word;
|
||||
TWERD *chopped2 = new TWERD;
|
||||
chopped2->blobs.reserve(chopped->NumBlobs() - split_pt);
|
||||
for (int i = split_pt; i < chopped->NumBlobs(); ++i) {
|
||||
chopped2->blobs.push_back(chopped->blobs[i]);
|
||||
}
|
||||
chopped->blobs.truncate(split_pt);
|
||||
word->chopped_word = NULL;
|
||||
delete word2->chopped_word;
|
||||
word2->chopped_word = NULL;
|
||||
|
||||
const UNICHARSET &unicharset = *word->uch_set;
|
||||
word->ClearResults();
|
||||
word2->ClearResults();
|
||||
word->chopped_word = chopped;
|
||||
word2->chopped_word = chopped2;
|
||||
word->SetupBasicsFromChoppedWord(unicharset);
|
||||
word2->SetupBasicsFromChoppedWord(unicharset);
|
||||
|
||||
// Try to adjust the blamer bundle.
|
||||
if (orig_bb != NULL) {
|
||||
// TODO(rays) Looks like a leak to me.
|
||||
// orig_bb should take, rather than copy.
|
||||
word->blamer_bundle = new BlamerBundle();
|
||||
word2->blamer_bundle = new BlamerBundle();
|
||||
orig_bb->SplitBundle(chopped->blobs.back()->bounding_box().right(),
|
||||
word2->chopped_word->blobs[0]->bounding_box().left(),
|
||||
wordrec_debug_blamer,
|
||||
word->blamer_bundle, word2->blamer_bundle);
|
||||
}
|
||||
|
||||
*right_piece = word2;
|
||||
*orig_blamer_bundle = orig_bb;
|
||||
}
|
||||
|
||||
|
||||
/**********************************************************************
|
||||
* join_words
|
||||
*
|
||||
* The opposite of split_word():
|
||||
* join word2 (including any recognized data / seam array / etc)
|
||||
* onto the right of word and then delete word2.
|
||||
* Also, if orig_bb is provided, stitch it back into word.
|
||||
**********************************************************************/
|
||||
void Tesseract::join_words(WERD_RES *word,
|
||||
WERD_RES *word2,
|
||||
BlamerBundle *orig_bb) const {
|
||||
TBOX prev_box = word->chopped_word->blobs.back()->bounding_box();
|
||||
TBOX blob_box = word2->chopped_word->blobs[0]->bounding_box();
|
||||
// Tack the word2 outputs onto the end of the word outputs.
|
||||
word->chopped_word->blobs += word2->chopped_word->blobs;
|
||||
word->rebuild_word->blobs += word2->rebuild_word->blobs;
|
||||
word2->chopped_word->blobs.clear();
|
||||
word2->rebuild_word->blobs.clear();
|
||||
TPOINT split_pt;
|
||||
split_pt.x = (prev_box.right() + blob_box.left()) / 2;
|
||||
split_pt.y = (prev_box.top() + prev_box.bottom() +
|
||||
blob_box.top() + blob_box.bottom()) / 4;
|
||||
// Move the word2 seams onto the end of the word1 seam_array.
|
||||
// Since the seam list is one element short, an empty seam marking the
|
||||
// end of the last blob in the first word is needed first.
|
||||
word->seam_array.push_back(new SEAM(0.0f, split_pt));
|
||||
word->seam_array += word2->seam_array;
|
||||
word2->seam_array.truncate(0);
|
||||
// Fix widths and gaps.
|
||||
word->blob_widths += word2->blob_widths;
|
||||
word->blob_gaps += word2->blob_gaps;
|
||||
// Fix the ratings matrix.
|
||||
int rat1 = word->ratings->dimension();
|
||||
int rat2 = word2->ratings->dimension();
|
||||
word->ratings->AttachOnCorner(word2->ratings);
|
||||
ASSERT_HOST(word->ratings->dimension() == rat1 + rat2);
|
||||
word->best_state += word2->best_state;
|
||||
// Append the word choices.
|
||||
*word->raw_choice += *word2->raw_choice;
|
||||
|
||||
// How many alt choices from each should we try to get?
|
||||
const int kAltsPerPiece = 2;
|
||||
// When do we start throwing away extra alt choices?
|
||||
const int kTooManyAltChoices = 100;
|
||||
|
||||
// Construct the cartesian product of the best_choices of word(1) and word2.
|
||||
WERD_CHOICE_LIST joined_choices;
|
||||
WERD_CHOICE_IT jc_it(&joined_choices);
|
||||
WERD_CHOICE_IT bc1_it(&word->best_choices);
|
||||
WERD_CHOICE_IT bc2_it(&word2->best_choices);
|
||||
int num_word1_choices = word->best_choices.length();
|
||||
int total_joined_choices = num_word1_choices;
|
||||
// Nota Bene: For the main loop here, we operate only on the 2nd and greater
|
||||
// word2 choices, and put them in the joined_choices list. The 1st word2
|
||||
// choice gets added to the original word1 choices in-place after we have
|
||||
// finished with them.
|
||||
int bc2_index = 1;
|
||||
for (bc2_it.forward(); !bc2_it.at_first(); bc2_it.forward(), ++bc2_index) {
|
||||
if (total_joined_choices >= kTooManyAltChoices &&
|
||||
bc2_index > kAltsPerPiece)
|
||||
break;
|
||||
int bc1_index = 0;
|
||||
for (bc1_it.move_to_first(); bc1_index < num_word1_choices;
|
||||
++bc1_index, bc1_it.forward()) {
|
||||
if (total_joined_choices >= kTooManyAltChoices &&
|
||||
bc1_index > kAltsPerPiece)
|
||||
break;
|
||||
WERD_CHOICE *wc = new WERD_CHOICE(*bc1_it.data());
|
||||
*wc += *bc2_it.data();
|
||||
jc_it.add_after_then_move(wc);
|
||||
++total_joined_choices;
|
||||
}
|
||||
}
|
||||
// Now that we've filled in as many alternates as we want, paste the best
|
||||
// choice for word2 onto the original word alt_choices.
|
||||
bc1_it.move_to_first();
|
||||
bc2_it.move_to_first();
|
||||
for (bc1_it.mark_cycle_pt(); !bc1_it.cycled_list(); bc1_it.forward()) {
|
||||
*bc1_it.data() += *bc2_it.data();
|
||||
}
|
||||
bc1_it.move_to_last();
|
||||
bc1_it.add_list_after(&joined_choices);
|
||||
|
||||
// Restore the pointer to original blamer bundle and combine blamer
|
||||
// information recorded in the splits.
|
||||
if (orig_bb != NULL) {
|
||||
orig_bb->JoinBlames(*word->blamer_bundle, *word2->blamer_bundle,
|
||||
wordrec_debug_blamer);
|
||||
delete word->blamer_bundle;
|
||||
word->blamer_bundle = orig_bb;
|
||||
}
|
||||
word->SetupBoxWord();
|
||||
word->reject_map.initialise(word->box_word->length());
|
||||
delete word2;
|
||||
}
|
||||
|
||||
|
||||
} // namespace tesseract
|
|
@ -0,0 +1,334 @@
|
|||
///////////////////////////////////////////////////////////////////////
|
||||
// File: thresholder.cpp
|
||||
// Description: Base API for thresolding images in tesseract.
|
||||
// Author: Ray Smith
|
||||
// Created: Mon May 12 11:28:15 PDT 2008
|
||||
//
|
||||
// (C) Copyright 2008, Google Inc.
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
///////////////////////////////////////////////////////////////////////
|
||||
|
||||
#include "allheaders.h"
|
||||
|
||||
#include "thresholder.h"
|
||||
|
||||
#include <string.h>
|
||||
|
||||
#include "otsuthr.h"
|
||||
|
||||
#include "openclwrapper.h"
|
||||
|
||||
namespace tesseract {
|
||||
|
||||
ImageThresholder::ImageThresholder()
|
||||
: pix_(NULL),
|
||||
image_width_(0), image_height_(0),
|
||||
pix_channels_(0), pix_wpl_(0),
|
||||
scale_(1), yres_(300), estimated_res_(300) {
|
||||
SetRectangle(0, 0, 0, 0);
|
||||
}
|
||||
|
||||
ImageThresholder::~ImageThresholder() {
|
||||
Clear();
|
||||
}
|
||||
|
||||
// Destroy the Pix if there is one, freeing memory.
|
||||
void ImageThresholder::Clear() {
|
||||
pixDestroy(&pix_);
|
||||
}
|
||||
|
||||
// Return true if no image has been set.
|
||||
bool ImageThresholder::IsEmpty() const {
|
||||
return pix_ == NULL;
|
||||
}
|
||||
|
||||
// SetImage makes a copy of all the image data, so it may be deleted
|
||||
// immediately after this call.
|
||||
// Greyscale of 8 and color of 24 or 32 bits per pixel may be given.
|
||||
// Palette color images will not work properly and must be converted to
|
||||
// 24 bit.
|
||||
// Binary images of 1 bit per pixel may also be given but they must be
|
||||
// byte packed with the MSB of the first byte being the first pixel, and a
|
||||
// one pixel is WHITE. For binary images set bytes_per_pixel=0.
|
||||
void ImageThresholder::SetImage(const unsigned char* imagedata,
|
||||
int width, int height,
|
||||
int bytes_per_pixel, int bytes_per_line) {
|
||||
int bpp = bytes_per_pixel * 8;
|
||||
if (bpp == 0) bpp = 1;
|
||||
Pix* pix = pixCreate(width, height, bpp == 24 ? 32 : bpp);
|
||||
l_uint32* data = pixGetData(pix);
|
||||
int wpl = pixGetWpl(pix);
|
||||
switch (bpp) {
|
||||
case 1:
|
||||
for (int y = 0; y < height; ++y, data += wpl, imagedata += bytes_per_line) {
|
||||
for (int x = 0; x < width; ++x) {
|
||||
if (imagedata[x / 8] & (0x80 >> (x % 8)))
|
||||
CLEAR_DATA_BIT(data, x);
|
||||
else
|
||||
SET_DATA_BIT(data, x);
|
||||
}
|
||||
}
|
||||
break;
|
||||
|
||||
case 8:
|
||||
// Greyscale just copies the bytes in the right order.
|
||||
for (int y = 0; y < height; ++y, data += wpl, imagedata += bytes_per_line) {
|
||||
for (int x = 0; x < width; ++x)
|
||||
SET_DATA_BYTE(data, x, imagedata[x]);
|
||||
}
|
||||
break;
|
||||
|
||||
case 24:
|
||||
// Put the colors in the correct places in the line buffer.
|
||||
for (int y = 0; y < height; ++y, imagedata += bytes_per_line) {
|
||||
for (int x = 0; x < width; ++x, ++data) {
|
||||
SET_DATA_BYTE(data, COLOR_RED, imagedata[3 * x]);
|
||||
SET_DATA_BYTE(data, COLOR_GREEN, imagedata[3 * x + 1]);
|
||||
SET_DATA_BYTE(data, COLOR_BLUE, imagedata[3 * x + 2]);
|
||||
}
|
||||
}
|
||||
break;
|
||||
|
||||
case 32:
|
||||
// Maintain byte order consistency across different endianness.
|
||||
for (int y = 0; y < height; ++y, imagedata += bytes_per_line, data += wpl) {
|
||||
for (int x = 0; x < width; ++x) {
|
||||
data[x] = (imagedata[x * 4] << 24) | (imagedata[x * 4 + 1] << 16) |
|
||||
(imagedata[x * 4 + 2] << 8) | imagedata[x * 4 + 3];
|
||||
}
|
||||
}
|
||||
break;
|
||||
|
||||
default:
|
||||
tprintf("Cannot convert RAW image to Pix with bpp = %d\n", bpp);
|
||||
}
|
||||
pixSetYRes(pix, 300);
|
||||
SetImage(pix);
|
||||
pixDestroy(&pix);
|
||||
}
|
||||
|
||||
// Store the coordinates of the rectangle to process for later use.
|
||||
// Doesn't actually do any thresholding.
|
||||
void ImageThresholder::SetRectangle(int left, int top, int width, int height) {
|
||||
rect_left_ = left;
|
||||
rect_top_ = top;
|
||||
rect_width_ = width;
|
||||
rect_height_ = height;
|
||||
}
|
||||
|
||||
// Get enough parameters to be able to rebuild bounding boxes in the
|
||||
// original image (not just within the rectangle).
|
||||
// Left and top are enough with top-down coordinates, but
|
||||
// the height of the rectangle and the image are needed for bottom-up.
|
||||
void ImageThresholder::GetImageSizes(int* left, int* top,
|
||||
int* width, int* height,
|
||||
int* imagewidth, int* imageheight) {
|
||||
*left = rect_left_;
|
||||
*top = rect_top_;
|
||||
*width = rect_width_;
|
||||
*height = rect_height_;
|
||||
*imagewidth = image_width_;
|
||||
*imageheight = image_height_;
|
||||
}
|
||||
|
||||
// Pix vs raw, which to use? Pix is the preferred input for efficiency,
|
||||
// since raw buffers are copied.
|
||||
// SetImage for Pix clones its input, so the source pix may be pixDestroyed
|
||||
// immediately after, but may not go away until after the Thresholder has
|
||||
// finished with it.
|
||||
void ImageThresholder::SetImage(const Pix* pix) {
|
||||
if (pix_ != NULL)
|
||||
pixDestroy(&pix_);
|
||||
Pix* src = const_cast<Pix*>(pix);
|
||||
int depth;
|
||||
pixGetDimensions(src, &image_width_, &image_height_, &depth);
|
||||
// Convert the image as necessary so it is one of binary, plain RGB, or
|
||||
// 8 bit with no colormap. Guarantee that we always end up with our own copy,
|
||||
// not just a clone of the input.
|
||||
if (pixGetColormap(src)) {
|
||||
Pix* tmp = pixRemoveColormap(src, REMOVE_CMAP_BASED_ON_SRC);
|
||||
depth = pixGetDepth(tmp);
|
||||
if (depth > 1 && depth < 8) {
|
||||
pix_ = pixConvertTo8(tmp, false);
|
||||
pixDestroy(&tmp);
|
||||
}
|
||||
else {
|
||||
pix_ = tmp;
|
||||
}
|
||||
}
|
||||
else if (depth > 1 && depth < 8) {
|
||||
pix_ = pixConvertTo8(src, false);
|
||||
}
|
||||
else {
|
||||
pix_ = pixCopy(NULL, src);
|
||||
}
|
||||
depth = pixGetDepth(pix_);
|
||||
pix_channels_ = depth / 8;
|
||||
pix_wpl_ = pixGetWpl(pix_);
|
||||
scale_ = 1;
|
||||
estimated_res_ = yres_ = pixGetYRes(pix_);
|
||||
Init();
|
||||
}
|
||||
|
||||
// Threshold the source image as efficiently as possible to the output Pix.
|
||||
// Creates a Pix and sets pix to point to the resulting pointer.
|
||||
// Caller must use pixDestroy to free the created Pix.
|
||||
void ImageThresholder::ThresholdToPix(PageSegMode pageseg_mode, Pix** pix) {
|
||||
if (pix_channels_ == 0) {
|
||||
// We have a binary image, but it still has to be copied, as this API
|
||||
// allows the caller to modify the output.
|
||||
Pix* original = GetPixRect();
|
||||
*pix = pixCopy(NULL, original);
|
||||
pixDestroy(&original);
|
||||
}
|
||||
else {
|
||||
OtsuThresholdRectToPix(pix_, pix);
|
||||
}
|
||||
}
|
||||
|
||||
// Gets a pix that contains an 8 bit threshold value at each pixel. The
|
||||
// returned pix may be an integer reduction of the binary image such that
|
||||
// the scale factor may be inferred from the ratio of the sizes, even down
|
||||
// to the extreme of a 1x1 pixel thresholds image.
|
||||
// Ideally the 8 bit threshold should be the exact threshold used to generate
|
||||
// the binary image in ThresholdToPix, but this is not a hard constraint.
|
||||
// Returns NULL if the input is binary. PixDestroy after use.
|
||||
Pix* ImageThresholder::GetPixRectThresholds() {
|
||||
if (IsBinary()) return NULL;
|
||||
Pix* pix_grey = GetPixRectGrey();
|
||||
int width = pixGetWidth(pix_grey);
|
||||
int height = pixGetHeight(pix_grey);
|
||||
int* thresholds;
|
||||
int* hi_values;
|
||||
OtsuThreshold(pix_grey, 0, 0, width, height, &thresholds, &hi_values);
|
||||
pixDestroy(&pix_grey);
|
||||
Pix* pix_thresholds = pixCreate(width, height, 8);
|
||||
int threshold = thresholds[0] > 0 ? thresholds[0] : 128;
|
||||
pixSetAllArbitrary(pix_thresholds, threshold);
|
||||
delete[] thresholds;
|
||||
delete[] hi_values;
|
||||
return pix_thresholds;
|
||||
}
|
||||
|
||||
// Common initialization shared between SetImage methods.
|
||||
void ImageThresholder::Init() {
|
||||
SetRectangle(0, 0, image_width_, image_height_);
|
||||
}
|
||||
|
||||
// Get a clone/copy of the source image rectangle.
|
||||
// The returned Pix must be pixDestroyed.
|
||||
// This function will be used in the future by the page layout analysis, and
|
||||
// the layout analysis that uses it will only be available with Leptonica,
|
||||
// so there is no raw equivalent.
|
||||
Pix* ImageThresholder::GetPixRect() {
|
||||
if (IsFullImage()) {
|
||||
// Just clone the whole thing.
|
||||
return pixClone(pix_);
|
||||
}
|
||||
else {
|
||||
// Crop to the given rectangle.
|
||||
Box* box = boxCreate(rect_left_, rect_top_, rect_width_, rect_height_);
|
||||
Pix* cropped = pixClipRectangle(pix_, box, NULL);
|
||||
boxDestroy(&box);
|
||||
return cropped;
|
||||
}
|
||||
}
|
||||
|
||||
// Get a clone/copy of the source image rectangle, reduced to greyscale,
|
||||
// and at the same resolution as the output binary.
|
||||
// The returned Pix must be pixDestroyed.
|
||||
// Provided to the classifier to extract features from the greyscale image.
|
||||
Pix* ImageThresholder::GetPixRectGrey() {
|
||||
Pix* pix = GetPixRect(); // May have to be reduced to grey.
|
||||
int depth = pixGetDepth(pix);
|
||||
if (depth != 8) {
|
||||
Pix* result = depth < 8 ? pixConvertTo8(pix, false)
|
||||
: pixConvertRGBToLuminance(pix);
|
||||
pixDestroy(&pix);
|
||||
return result;
|
||||
}
|
||||
return pix;
|
||||
}
|
||||
|
||||
// Otsu thresholds the rectangle, taking the rectangle from *this.
|
||||
void ImageThresholder::OtsuThresholdRectToPix(Pix* src_pix,
|
||||
Pix** out_pix) const {
|
||||
PERF_COUNT_START("OtsuThresholdRectToPix")
|
||||
int* thresholds;
|
||||
int* hi_values;
|
||||
|
||||
int num_channels = OtsuThreshold(src_pix, rect_left_, rect_top_, rect_width_,
|
||||
rect_height_, &thresholds, &hi_values);
|
||||
// only use opencl if compiled w/ OpenCL and selected device is opencl
|
||||
#ifdef USE_OPENCL
|
||||
OpenclDevice od;
|
||||
if ((num_channels == 4 || num_channels == 1) &&
|
||||
od.selectedDeviceIsOpenCL() && rect_top_ == 0 && rect_left_ == 0) {
|
||||
od.ThresholdRectToPixOCL((unsigned char*)pixGetData(src_pix), num_channels,
|
||||
pixGetWpl(src_pix) * 4, thresholds, hi_values,
|
||||
out_pix /*pix_OCL*/, rect_height_, rect_width_,
|
||||
rect_top_, rect_left_);
|
||||
}
|
||||
else {
|
||||
#endif
|
||||
ThresholdRectToPix(src_pix, num_channels, thresholds, hi_values, out_pix);
|
||||
#ifdef USE_OPENCL
|
||||
}
|
||||
#endif
|
||||
delete[] thresholds;
|
||||
delete[] hi_values;
|
||||
|
||||
PERF_COUNT_END
|
||||
}
|
||||
|
||||
/// Threshold the rectangle, taking everything except the src_pix
|
||||
/// from the class, using thresholds/hi_values to the output pix.
|
||||
/// NOTE that num_channels is the size of the thresholds and hi_values
|
||||
// arrays and also the bytes per pixel in src_pix.
|
||||
void ImageThresholder::ThresholdRectToPix(Pix* src_pix,
|
||||
int num_channels,
|
||||
const int* thresholds,
|
||||
const int* hi_values,
|
||||
Pix** pix) const {
|
||||
PERF_COUNT_START("ThresholdRectToPix")
|
||||
*pix = pixCreate(rect_width_, rect_height_, 1);
|
||||
uinT32* pixdata = pixGetData(*pix);
|
||||
int wpl = pixGetWpl(*pix);
|
||||
int src_wpl = pixGetWpl(src_pix);
|
||||
uinT32* srcdata = pixGetData(src_pix);
|
||||
for (int y = 0; y < rect_height_; ++y) {
|
||||
const uinT32* linedata = srcdata + (y + rect_top_) * src_wpl;
|
||||
uinT32* pixline = pixdata + y * wpl;
|
||||
for (int x = 0; x < rect_width_; ++x) {
|
||||
bool white_result = true;
|
||||
for (int ch = 0; ch < num_channels; ++ch) {
|
||||
int pixel = GET_DATA_BYTE(const_cast<void*>(
|
||||
reinterpret_cast<const void *>(linedata)),
|
||||
(x + rect_left_) * num_channels + ch);
|
||||
if (hi_values[ch] >= 0 &&
|
||||
(pixel > thresholds[ch]) == (hi_values[ch] == 0)) {
|
||||
white_result = false;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (white_result)
|
||||
CLEAR_DATA_BIT(pixline, x);
|
||||
else
|
||||
SET_DATA_BIT(pixline, x);
|
||||
}
|
||||
}
|
||||
|
||||
PERF_COUNT_END
|
||||
}
|
||||
|
||||
} // namespace tesseract.
|
||||
|
|
@ -0,0 +1,189 @@
|
|||
///////////////////////////////////////////////////////////////////////
|
||||
// File: thresholder.h
|
||||
// Description: Base API for thresolding images in tesseract.
|
||||
// Author: Ray Smith
|
||||
// Created: Mon May 12 11:00:15 PDT 2008
|
||||
//
|
||||
// (C) Copyright 2008, Google Inc.
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
///////////////////////////////////////////////////////////////////////
|
||||
|
||||
#ifndef TESSERACT_CCMAIN_THRESHOLDER_H__
|
||||
#define TESSERACT_CCMAIN_THRESHOLDER_H__
|
||||
|
||||
#include "platform.h"
|
||||
#include "publictypes.h"
|
||||
|
||||
struct Pix;
|
||||
|
||||
namespace tesseract {
|
||||
|
||||
/// Base class for all tesseract image thresholding classes.
|
||||
/// Specific classes can add new thresholding methods by
|
||||
/// overriding ThresholdToPix.
|
||||
/// Each instance deals with a single image, but the design is intended to
|
||||
/// be useful for multiple calls to SetRectangle and ThresholdTo* if
|
||||
/// desired.
|
||||
class TESS_API ImageThresholder {
|
||||
public:
|
||||
ImageThresholder();
|
||||
virtual ~ImageThresholder();
|
||||
|
||||
/// Destroy the Pix if there is one, freeing memory.
|
||||
virtual void Clear();
|
||||
|
||||
/// Return true if no image has been set.
|
||||
bool IsEmpty() const;
|
||||
|
||||
/// SetImage makes a copy of all the image data, so it may be deleted
|
||||
/// immediately after this call.
|
||||
/// Greyscale of 8 and color of 24 or 32 bits per pixel may be given.
|
||||
/// Palette color images will not work properly and must be converted to
|
||||
/// 24 bit.
|
||||
/// Binary images of 1 bit per pixel may also be given but they must be
|
||||
/// byte packed with the MSB of the first byte being the first pixel, and a
|
||||
/// one pixel is WHITE. For binary images set bytes_per_pixel=0.
|
||||
void SetImage(const unsigned char* imagedata, int width, int height,
|
||||
int bytes_per_pixel, int bytes_per_line);
|
||||
|
||||
/// Store the coordinates of the rectangle to process for later use.
|
||||
/// Doesn't actually do any thresholding.
|
||||
void SetRectangle(int left, int top, int width, int height);
|
||||
|
||||
/// Get enough parameters to be able to rebuild bounding boxes in the
|
||||
/// original image (not just within the rectangle).
|
||||
/// Left and top are enough with top-down coordinates, but
|
||||
/// the height of the rectangle and the image are needed for bottom-up.
|
||||
virtual void GetImageSizes(int* left, int* top, int* width, int* height,
|
||||
int* imagewidth, int* imageheight);
|
||||
|
||||
/// Return true if the source image is color.
|
||||
bool IsColor() const {
|
||||
return pix_channels_ >= 3;
|
||||
}
|
||||
|
||||
/// Returns true if the source image is binary.
|
||||
bool IsBinary() const {
|
||||
return pix_channels_ == 0;
|
||||
}
|
||||
|
||||
int GetScaleFactor() const {
|
||||
return scale_;
|
||||
}
|
||||
|
||||
// Set the resolution of the source image in pixels per inch.
|
||||
// This should be called right after SetImage(), and will let us return
|
||||
// appropriate font sizes for the text.
|
||||
void SetSourceYResolution(int ppi) {
|
||||
yres_ = ppi;
|
||||
estimated_res_ = ppi;
|
||||
}
|
||||
int GetSourceYResolution() const {
|
||||
return yres_;
|
||||
}
|
||||
int GetScaledYResolution() const {
|
||||
return scale_ * yres_;
|
||||
}
|
||||
// Set the resolution of the source image in pixels per inch, as estimated
|
||||
// by the thresholder from the text size found during thresholding.
|
||||
// This value will be used to set internal size thresholds during recognition
|
||||
// and will not influence the output "point size." The default value is
|
||||
// the same as the source resolution. (yres_)
|
||||
void SetEstimatedResolution(int ppi) {
|
||||
estimated_res_ = ppi;
|
||||
}
|
||||
// Returns the estimated resolution, including any active scaling.
|
||||
// This value will be used to set internal size thresholds during recognition.
|
||||
int GetScaledEstimatedResolution() const {
|
||||
return scale_ * estimated_res_;
|
||||
}
|
||||
|
||||
/// Pix vs raw, which to use? Pix is the preferred input for efficiency,
|
||||
/// since raw buffers are copied.
|
||||
/// SetImage for Pix clones its input, so the source pix may be pixDestroyed
|
||||
/// immediately after, but may not go away until after the Thresholder has
|
||||
/// finished with it.
|
||||
void SetImage(const Pix* pix);
|
||||
|
||||
/// Threshold the source image as efficiently as possible to the output Pix.
|
||||
/// Creates a Pix and sets pix to point to the resulting pointer.
|
||||
/// Caller must use pixDestroy to free the created Pix.
|
||||
virtual void ThresholdToPix(PageSegMode pageseg_mode, Pix** pix);
|
||||
|
||||
// Gets a pix that contains an 8 bit threshold value at each pixel. The
|
||||
// returned pix may be an integer reduction of the binary image such that
|
||||
// the scale factor may be inferred from the ratio of the sizes, even down
|
||||
// to the extreme of a 1x1 pixel thresholds image.
|
||||
// Ideally the 8 bit threshold should be the exact threshold used to generate
|
||||
// the binary image in ThresholdToPix, but this is not a hard constraint.
|
||||
// Returns NULL if the input is binary. PixDestroy after use.
|
||||
virtual Pix* GetPixRectThresholds();
|
||||
|
||||
/// Get a clone/copy of the source image rectangle.
|
||||
/// The returned Pix must be pixDestroyed.
|
||||
/// This function will be used in the future by the page layout analysis, and
|
||||
/// the layout analysis that uses it will only be available with Leptonica,
|
||||
/// so there is no raw equivalent.
|
||||
Pix* GetPixRect();
|
||||
|
||||
// Get a clone/copy of the source image rectangle, reduced to greyscale,
|
||||
// and at the same resolution as the output binary.
|
||||
// The returned Pix must be pixDestroyed.
|
||||
// Provided to the classifier to extract features from the greyscale image.
|
||||
virtual Pix* GetPixRectGrey();
|
||||
|
||||
protected:
|
||||
// ----------------------------------------------------------------------
|
||||
// Utility functions that may be useful components for other thresholders.
|
||||
|
||||
/// Common initialization shared between SetImage methods.
|
||||
virtual void Init();
|
||||
|
||||
/// Return true if we are processing the full image.
|
||||
bool IsFullImage() const {
|
||||
return rect_left_ == 0 && rect_top_ == 0 &&
|
||||
rect_width_ == image_width_ && rect_height_ == image_height_;
|
||||
}
|
||||
|
||||
// Otsu thresholds the rectangle, taking the rectangle from *this.
|
||||
void OtsuThresholdRectToPix(Pix* src_pix, Pix** out_pix) const;
|
||||
|
||||
/// Threshold the rectangle, taking everything except the src_pix
|
||||
/// from the class, using thresholds/hi_values to the output pix.
|
||||
/// NOTE that num_channels is the size of the thresholds and hi_values
|
||||
// arrays and also the bytes per pixel in src_pix.
|
||||
void ThresholdRectToPix(Pix* src_pix, int num_channels,
|
||||
const int* thresholds, const int* hi_values,
|
||||
Pix** pix) const;
|
||||
|
||||
protected:
|
||||
/// Clone or other copy of the source Pix.
|
||||
/// The pix will always be PixDestroy()ed on destruction of the class.
|
||||
Pix* pix_;
|
||||
|
||||
int image_width_; //< Width of source pix_.
|
||||
int image_height_; //< Height of source pix_.
|
||||
int pix_channels_; //< Number of 8-bit channels in pix_.
|
||||
int pix_wpl_; //< Words per line of pix_.
|
||||
// Limits of image rectangle to be processed.
|
||||
int scale_; //< Scale factor from original image.
|
||||
int yres_; //< y pixels/inch in source image.
|
||||
int estimated_res_; //< Resolution estimate from text size.
|
||||
int rect_left_;
|
||||
int rect_top_;
|
||||
int rect_width_;
|
||||
int rect_height_;
|
||||
};
|
||||
|
||||
} // namespace tesseract.
|
||||
|
||||
#endif // TESSERACT_CCMAIN_THRESHOLDER_H__
|
|
@ -0,0 +1,2 @@
|
|||
#define GIT_REV "3.05.00dev"
|
||||
|
|
@ -0,0 +1,59 @@
|
|||
/**********************************************************************
|
||||
* File: werdit.cpp (Formerly wordit.c)
|
||||
* Description: An iterator for passing over all the words in a document.
|
||||
* Author: Ray Smith
|
||||
* Created: Mon Apr 27 08:51:22 BST 1992
|
||||
*
|
||||
* (C) Copyright 1992, Hewlett-Packard Ltd.
|
||||
** Licensed under the Apache License, Version 2.0 (the "License");
|
||||
** you may not use this file except in compliance with the License.
|
||||
** You may obtain a copy of the License at
|
||||
** http://www.apache.org/licenses/LICENSE-2.0
|
||||
** Unless required by applicable law or agreed to in writing, software
|
||||
** distributed under the License is distributed on an "AS IS" BASIS,
|
||||
** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
** See the License for the specific language governing permissions and
|
||||
** limitations under the License.
|
||||
*
|
||||
**********************************************************************/
|
||||
|
||||
#include "werdit.h"
|
||||
|
||||
/**********************************************************************
|
||||
* make_pseudo_word
|
||||
*
|
||||
* Make all the blobs inside a selection into a single word.
|
||||
* The returned PAGE_RES_IT* it points to the new word. After use, call
|
||||
* it->DeleteCurrentWord() to delete the fake word, and then
|
||||
* delete it to get rid of the iterator itself.
|
||||
**********************************************************************/
|
||||
|
||||
PAGE_RES_IT* make_pseudo_word(PAGE_RES* page_res, const TBOX& selection_box) {
|
||||
PAGE_RES_IT pr_it(page_res);
|
||||
C_BLOB_LIST new_blobs; // list of gathered blobs
|
||||
C_BLOB_IT new_blob_it = &new_blobs; // iterator
|
||||
|
||||
for (WERD_RES* word_res = pr_it.word(); word_res != NULL;
|
||||
word_res = pr_it.forward()) {
|
||||
WERD* word = word_res->word;
|
||||
if (word->bounding_box().overlap(selection_box)) {
|
||||
C_BLOB_IT blob_it(word->cblob_list());
|
||||
for (blob_it.mark_cycle_pt();
|
||||
!blob_it.cycled_list(); blob_it.forward()) {
|
||||
C_BLOB* blob = blob_it.data();
|
||||
if (blob->bounding_box().overlap(selection_box)) {
|
||||
new_blob_it.add_after_then_move(C_BLOB::deep_copy(blob));
|
||||
}
|
||||
}
|
||||
if (!new_blobs.empty()) {
|
||||
WERD* pseudo_word = new WERD(&new_blobs, 1, NULL);
|
||||
word_res = pr_it.InsertSimpleCloneWord(*word_res, pseudo_word);
|
||||
PAGE_RES_IT* it = new PAGE_RES_IT(page_res);
|
||||
while (it->word() != word_res && it->word() != NULL) it->forward();
|
||||
ASSERT_HOST(it->word() == word_res);
|
||||
return it;
|
||||
}
|
||||
}
|
||||
}
|
||||
return NULL;
|
||||
}
|
|
@ -0,0 +1,27 @@
|
|||
/**********************************************************************
|
||||
* File: wordit.c
|
||||
* Description: An iterator for passing over all the words in a document.
|
||||
* Author: Ray Smith
|
||||
* Created: Mon Apr 27 08:51:22 BST 1992
|
||||
*
|
||||
* (C) Copyright 1992, Hewlett-Packard Ltd.
|
||||
** Licensed under the Apache License, Version 2.0 (the "License");
|
||||
** you may not use this file except in compliance with the License.
|
||||
** You may obtain a copy of the License at
|
||||
** http://www.apache.org/licenses/LICENSE-2.0
|
||||
** Unless required by applicable law or agreed to in writing, software
|
||||
** distributed under the License is distributed on an "AS IS" BASIS,
|
||||
** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
** See the License for the specific language governing permissions and
|
||||
** limitations under the License.
|
||||
*
|
||||
**********************************************************************/
|
||||
|
||||
#ifndef WERDIT_H
|
||||
#define WERDIT_H
|
||||
|
||||
#include "pageres.h"
|
||||
|
||||
PAGE_RES_IT* make_pseudo_word(PAGE_RES* page_res, const TBOX& selection_box);
|
||||
|
||||
#endif
|
|
@ -0,0 +1,603 @@
|
|||
///////////////////////////////////////////////////////////////////////
|
||||
// File: blamer.cpp
|
||||
// Description: Module allowing precise error causes to be allocated.
|
||||
// Author: Rike Antonova
|
||||
// Refactored: Ray Smith
|
||||
// Created: Mon Feb 04 14:37:01 PST 2013
|
||||
//
|
||||
// (C) Copyright 2013, Google Inc.
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
///////////////////////////////////////////////////////////////////////
|
||||
|
||||
#include "blamer.h"
|
||||
#include "blobs.h"
|
||||
#include "matrix.h"
|
||||
#include "normalis.h"
|
||||
#include "pageres.h"
|
||||
|
||||
// Names for each value of IncorrectResultReason enum. Keep in sync.
|
||||
const char kBlameCorrect[] = "corr";
|
||||
const char kBlameClassifier[] = "cl";
|
||||
const char kBlameChopper[] = "chop";
|
||||
const char kBlameClassLMTradeoff[] = "cl/LM";
|
||||
const char kBlamePageLayout[] = "pglt";
|
||||
const char kBlameSegsearchHeur[] = "ss_heur";
|
||||
const char kBlameSegsearchPP[] = "ss_pp";
|
||||
const char kBlameClassOldLMTradeoff[] = "cl/old_LM";
|
||||
const char kBlameAdaption[] = "adapt";
|
||||
const char kBlameNoTruthSplit[] = "no_tr_spl";
|
||||
const char kBlameNoTruth[] = "no_tr";
|
||||
const char kBlameUnknown[] = "unkn";
|
||||
|
||||
const char * const kIncorrectResultReasonNames[] = {
|
||||
kBlameCorrect,
|
||||
kBlameClassifier,
|
||||
kBlameChopper,
|
||||
kBlameClassLMTradeoff,
|
||||
kBlamePageLayout,
|
||||
kBlameSegsearchHeur,
|
||||
kBlameSegsearchPP,
|
||||
kBlameClassOldLMTradeoff,
|
||||
kBlameAdaption,
|
||||
kBlameNoTruthSplit,
|
||||
kBlameNoTruth,
|
||||
kBlameUnknown
|
||||
};
|
||||
|
||||
const char *BlamerBundle::IncorrectReasonName(IncorrectResultReason irr) {
|
||||
return kIncorrectResultReasonNames[irr];
|
||||
}
|
||||
|
||||
const char *BlamerBundle::IncorrectReason() const {
|
||||
return kIncorrectResultReasonNames[incorrect_result_reason_];
|
||||
}
|
||||
|
||||
// Functions to setup the blamer.
|
||||
// Whole word string, whole word bounding box.
|
||||
void BlamerBundle::SetWordTruth(const UNICHARSET& unicharset,
|
||||
const char* truth_str, const TBOX& word_box) {
|
||||
truth_word_.InsertBox(0, word_box);
|
||||
truth_has_char_boxes_ = false;
|
||||
// Encode the string as UNICHAR_IDs.
|
||||
GenericVector<UNICHAR_ID> encoding;
|
||||
GenericVector<char> lengths;
|
||||
unicharset.encode_string(truth_str, false, &encoding, &lengths, NULL);
|
||||
int total_length = 0;
|
||||
for (int i = 0; i < encoding.size(); total_length += lengths[i++]) {
|
||||
STRING uch(truth_str + total_length);
|
||||
uch.truncate_at(lengths[i] - total_length);
|
||||
UNICHAR_ID id = encoding[i];
|
||||
if (id != INVALID_UNICHAR_ID) uch = unicharset.get_normed_unichar(id);
|
||||
truth_text_.push_back(uch);
|
||||
}
|
||||
}
|
||||
|
||||
// Single "character" string, "character" bounding box.
|
||||
// May be called multiple times to indicate the characters in a word.
|
||||
void BlamerBundle::SetSymbolTruth(const UNICHARSET& unicharset,
|
||||
const char* char_str, const TBOX& char_box) {
|
||||
STRING symbol_str(char_str);
|
||||
UNICHAR_ID id = unicharset.unichar_to_id(char_str);
|
||||
if (id != INVALID_UNICHAR_ID) {
|
||||
STRING normed_uch(unicharset.get_normed_unichar(id));
|
||||
if (normed_uch.length() > 0) symbol_str = normed_uch;
|
||||
}
|
||||
int length = truth_word_.length();
|
||||
truth_text_.push_back(symbol_str);
|
||||
truth_word_.InsertBox(length, char_box);
|
||||
if (length == 0)
|
||||
truth_has_char_boxes_ = true;
|
||||
else if (truth_word_.BlobBox(length - 1) == char_box)
|
||||
truth_has_char_boxes_ = false;
|
||||
}
|
||||
|
||||
// Marks that there is something wrong with the truth text, like it contains
|
||||
// reject characters.
|
||||
void BlamerBundle::SetRejectedTruth() {
|
||||
incorrect_result_reason_ = IRR_NO_TRUTH;
|
||||
truth_has_char_boxes_ = false;
|
||||
}
|
||||
|
||||
// Returns true if the provided word_choice is correct.
|
||||
bool BlamerBundle::ChoiceIsCorrect(const WERD_CHOICE* word_choice) const {
|
||||
if (word_choice == NULL) return false;
|
||||
const UNICHARSET* uni_set = word_choice->unicharset();
|
||||
STRING normed_choice_str;
|
||||
for (int i = 0; i < word_choice->length(); ++i) {
|
||||
normed_choice_str +=
|
||||
uni_set->get_normed_unichar(word_choice->unichar_id(i));
|
||||
}
|
||||
STRING truth_str = TruthString();
|
||||
return truth_str == normed_choice_str;
|
||||
}
|
||||
|
||||
void BlamerBundle::FillDebugString(const STRING &msg,
|
||||
const WERD_CHOICE *choice,
|
||||
STRING *debug) {
|
||||
(*debug) += "Truth ";
|
||||
for (int i = 0; i < this->truth_text_.length(); ++i) {
|
||||
(*debug) += this->truth_text_[i];
|
||||
}
|
||||
if (!this->truth_has_char_boxes_) (*debug) += " (no char boxes)";
|
||||
if (choice != NULL) {
|
||||
(*debug) += " Choice ";
|
||||
STRING choice_str;
|
||||
choice->string_and_lengths(&choice_str, NULL);
|
||||
(*debug) += choice_str;
|
||||
}
|
||||
if (msg.length() > 0) {
|
||||
(*debug) += "\n";
|
||||
(*debug) += msg;
|
||||
}
|
||||
(*debug) += "\n";
|
||||
}
|
||||
|
||||
// Sets up the norm_truth_word from truth_word using the given DENORM.
|
||||
void BlamerBundle::SetupNormTruthWord(const DENORM& denorm) {
|
||||
// TODO(rays) Is this the last use of denorm in WERD_RES and can it go?
|
||||
norm_box_tolerance_ = kBlamerBoxTolerance * denorm.x_scale();
|
||||
TPOINT topleft;
|
||||
TPOINT botright;
|
||||
TPOINT norm_topleft;
|
||||
TPOINT norm_botright;
|
||||
for (int b = 0; b < truth_word_.length(); ++b) {
|
||||
const TBOX &box = truth_word_.BlobBox(b);
|
||||
topleft.x = box.left();
|
||||
topleft.y = box.top();
|
||||
botright.x = box.right();
|
||||
botright.y = box.bottom();
|
||||
denorm.NormTransform(NULL, topleft, &norm_topleft);
|
||||
denorm.NormTransform(NULL, botright, &norm_botright);
|
||||
TBOX norm_box(norm_topleft.x, norm_botright.y,
|
||||
norm_botright.x, norm_topleft.y);
|
||||
norm_truth_word_.InsertBox(b, norm_box);
|
||||
}
|
||||
}
|
||||
|
||||
// Splits *this into two pieces in bundle1 and bundle2 (preallocated, empty
|
||||
// bundles) where the right edge/ of the left-hand word is word1_right,
|
||||
// and the left edge of the right-hand word is word2_left.
|
||||
void BlamerBundle::SplitBundle(int word1_right, int word2_left, bool debug,
|
||||
BlamerBundle* bundle1,
|
||||
BlamerBundle* bundle2) const {
|
||||
STRING debug_str;
|
||||
// Find truth boxes that correspond to the split in the blobs.
|
||||
int b;
|
||||
int begin2_truth_index = -1;
|
||||
if (incorrect_result_reason_ != IRR_NO_TRUTH &&
|
||||
truth_has_char_boxes_) {
|
||||
debug_str = "Looking for truth split at";
|
||||
debug_str.add_str_int(" end1_x ", word1_right);
|
||||
debug_str.add_str_int(" begin2_x ", word2_left);
|
||||
debug_str += "\nnorm_truth_word boxes:\n";
|
||||
if (norm_truth_word_.length() > 1) {
|
||||
norm_truth_word_.BlobBox(0).print_to_str(&debug_str);
|
||||
for (b = 1; b < norm_truth_word_.length(); ++b) {
|
||||
norm_truth_word_.BlobBox(b).print_to_str(&debug_str);
|
||||
if ((abs(word1_right - norm_truth_word_.BlobBox(b - 1).right()) <
|
||||
norm_box_tolerance_) &&
|
||||
(abs(word2_left - norm_truth_word_.BlobBox(b).left()) <
|
||||
norm_box_tolerance_)) {
|
||||
begin2_truth_index = b;
|
||||
debug_str += "Split found";
|
||||
break;
|
||||
}
|
||||
}
|
||||
debug_str += '\n';
|
||||
}
|
||||
}
|
||||
// Populate truth information in word and word2 with the first and second
|
||||
// part of the original truth.
|
||||
if (begin2_truth_index > 0) {
|
||||
bundle1->truth_has_char_boxes_ = true;
|
||||
bundle1->norm_box_tolerance_ = norm_box_tolerance_;
|
||||
bundle2->truth_has_char_boxes_ = true;
|
||||
bundle2->norm_box_tolerance_ = norm_box_tolerance_;
|
||||
BlamerBundle *curr_bb = bundle1;
|
||||
for (b = 0; b < norm_truth_word_.length(); ++b) {
|
||||
if (b == begin2_truth_index) curr_bb = bundle2;
|
||||
curr_bb->norm_truth_word_.InsertBox(b, norm_truth_word_.BlobBox(b));
|
||||
curr_bb->truth_word_.InsertBox(b, truth_word_.BlobBox(b));
|
||||
curr_bb->truth_text_.push_back(truth_text_[b]);
|
||||
}
|
||||
}
|
||||
else if (incorrect_result_reason_ == IRR_NO_TRUTH) {
|
||||
bundle1->incorrect_result_reason_ = IRR_NO_TRUTH;
|
||||
bundle2->incorrect_result_reason_ = IRR_NO_TRUTH;
|
||||
}
|
||||
else {
|
||||
debug_str += "Truth split not found";
|
||||
debug_str += truth_has_char_boxes_ ?
|
||||
"\n" : " (no truth char boxes)\n";
|
||||
bundle1->SetBlame(IRR_NO_TRUTH_SPLIT, debug_str, NULL, debug);
|
||||
bundle2->SetBlame(IRR_NO_TRUTH_SPLIT, debug_str, NULL, debug);
|
||||
}
|
||||
}
|
||||
|
||||
// "Joins" the blames from bundle1 and bundle2 into *this.
|
||||
void BlamerBundle::JoinBlames(const BlamerBundle& bundle1,
|
||||
const BlamerBundle& bundle2, bool debug) {
|
||||
STRING debug_str;
|
||||
IncorrectResultReason irr = incorrect_result_reason_;
|
||||
if (irr != IRR_NO_TRUTH_SPLIT) debug_str = "";
|
||||
if (bundle1.incorrect_result_reason_ != IRR_CORRECT &&
|
||||
bundle1.incorrect_result_reason_ != IRR_NO_TRUTH &&
|
||||
bundle1.incorrect_result_reason_ != IRR_NO_TRUTH_SPLIT) {
|
||||
debug_str += "Blame from part 1: ";
|
||||
debug_str += bundle1.debug_;
|
||||
irr = bundle1.incorrect_result_reason_;
|
||||
}
|
||||
if (bundle2.incorrect_result_reason_ != IRR_CORRECT &&
|
||||
bundle2.incorrect_result_reason_ != IRR_NO_TRUTH &&
|
||||
bundle2.incorrect_result_reason_ != IRR_NO_TRUTH_SPLIT) {
|
||||
debug_str += "Blame from part 2: ";
|
||||
debug_str += bundle2.debug_;
|
||||
if (irr == IRR_CORRECT) {
|
||||
irr = bundle2.incorrect_result_reason_;
|
||||
}
|
||||
else if (irr != bundle2.incorrect_result_reason_) {
|
||||
irr = IRR_UNKNOWN;
|
||||
}
|
||||
}
|
||||
incorrect_result_reason_ = irr;
|
||||
if (irr != IRR_CORRECT && irr != IRR_NO_TRUTH) {
|
||||
SetBlame(irr, debug_str, NULL, debug);
|
||||
}
|
||||
}
|
||||
|
||||
// If a blob with the same bounding box as one of the truth character
|
||||
// bounding boxes is not classified as the corresponding truth character
|
||||
// blames character classifier for incorrect answer.
|
||||
void BlamerBundle::BlameClassifier(const UNICHARSET& unicharset,
|
||||
const TBOX& blob_box,
|
||||
const BLOB_CHOICE_LIST& choices,
|
||||
bool debug) {
|
||||
if (!truth_has_char_boxes_ ||
|
||||
incorrect_result_reason_ != IRR_CORRECT)
|
||||
return; // Nothing to do here.
|
||||
|
||||
for (int b = 0; b < norm_truth_word_.length(); ++b) {
|
||||
const TBOX &truth_box = norm_truth_word_.BlobBox(b);
|
||||
// Note that we are more strict on the bounding box boundaries here
|
||||
// than in other places (chopper, segmentation search), since we do
|
||||
// not have the ability to check the previous and next bounding box.
|
||||
if (blob_box.x_almost_equal(truth_box, norm_box_tolerance_ / 2)) {
|
||||
bool found = false;
|
||||
bool incorrect_adapted = false;
|
||||
UNICHAR_ID incorrect_adapted_id = INVALID_UNICHAR_ID;
|
||||
const char *truth_str = truth_text_[b].string();
|
||||
// We promise not to modify the list or its contents, using a
|
||||
// const BLOB_CHOICE* below.
|
||||
BLOB_CHOICE_IT choices_it(const_cast<BLOB_CHOICE_LIST*>(&choices));
|
||||
for (choices_it.mark_cycle_pt(); !choices_it.cycled_list();
|
||||
choices_it.forward()) {
|
||||
const BLOB_CHOICE* choice = choices_it.data();
|
||||
if (strcmp(truth_str, unicharset.get_normed_unichar(
|
||||
choice->unichar_id())) == 0) {
|
||||
found = true;
|
||||
break;
|
||||
}
|
||||
else if (choice->IsAdapted()) {
|
||||
incorrect_adapted = true;
|
||||
incorrect_adapted_id = choice->unichar_id();
|
||||
}
|
||||
} // end choices_it for loop
|
||||
if (!found) {
|
||||
STRING debug_str = "unichar ";
|
||||
debug_str += truth_str;
|
||||
debug_str += " not found in classification list";
|
||||
SetBlame(IRR_CLASSIFIER, debug_str, NULL, debug);
|
||||
}
|
||||
else if (incorrect_adapted) {
|
||||
STRING debug_str = "better rating for adapted ";
|
||||
debug_str += unicharset.id_to_unichar(incorrect_adapted_id);
|
||||
debug_str += " than for correct ";
|
||||
debug_str += truth_str;
|
||||
SetBlame(IRR_ADAPTION, debug_str, NULL, debug);
|
||||
}
|
||||
break;
|
||||
}
|
||||
} // end iterating over blamer_bundle->norm_truth_word
|
||||
}
|
||||
|
||||
// Checks whether chops were made at all the character bounding box
|
||||
// boundaries in word->truth_word. If not - blames the chopper for an
|
||||
// incorrect answer.
|
||||
void BlamerBundle::SetChopperBlame(const WERD_RES* word, bool debug) {
|
||||
if (NoTruth() || !truth_has_char_boxes_ ||
|
||||
word->chopped_word->blobs.empty()) {
|
||||
return;
|
||||
}
|
||||
STRING debug_str;
|
||||
bool missing_chop = false;
|
||||
int num_blobs = word->chopped_word->blobs.size();
|
||||
int box_index = 0;
|
||||
int blob_index = 0;
|
||||
inT16 truth_x = -1;
|
||||
while (box_index < truth_word_.length() && blob_index < num_blobs) {
|
||||
truth_x = norm_truth_word_.BlobBox(box_index).right();
|
||||
TBLOB * curr_blob = word->chopped_word->blobs[blob_index];
|
||||
if (curr_blob->bounding_box().right() < truth_x - norm_box_tolerance_) {
|
||||
++blob_index;
|
||||
continue; // encountered an extra chop, keep looking
|
||||
}
|
||||
else if (curr_blob->bounding_box().right() >
|
||||
truth_x + norm_box_tolerance_) {
|
||||
missing_chop = true;
|
||||
break;
|
||||
}
|
||||
else {
|
||||
++blob_index;
|
||||
}
|
||||
}
|
||||
if (missing_chop || box_index < norm_truth_word_.length()) {
|
||||
STRING debug_str;
|
||||
if (missing_chop) {
|
||||
debug_str.add_str_int("Detected missing chop (tolerance=",
|
||||
norm_box_tolerance_);
|
||||
debug_str += ") at Bounding Box=";
|
||||
TBLOB * curr_blob = word->chopped_word->blobs[blob_index];
|
||||
curr_blob->bounding_box().print_to_str(&debug_str);
|
||||
debug_str.add_str_int("\nNo chop for truth at x=", truth_x);
|
||||
}
|
||||
else {
|
||||
debug_str.add_str_int("Missing chops for last ",
|
||||
norm_truth_word_.length() - box_index);
|
||||
debug_str += " truth box(es)";
|
||||
}
|
||||
debug_str += "\nMaximally chopped word boxes:\n";
|
||||
for (blob_index = 0; blob_index < num_blobs; ++blob_index) {
|
||||
TBLOB * curr_blob = word->chopped_word->blobs[blob_index];
|
||||
curr_blob->bounding_box().print_to_str(&debug_str);
|
||||
debug_str += '\n';
|
||||
}
|
||||
debug_str += "Truth bounding boxes:\n";
|
||||
for (box_index = 0; box_index < norm_truth_word_.length(); ++box_index) {
|
||||
norm_truth_word_.BlobBox(box_index).print_to_str(&debug_str);
|
||||
debug_str += '\n';
|
||||
}
|
||||
SetBlame(IRR_CHOPPER, debug_str, word->best_choice, debug);
|
||||
}
|
||||
}
|
||||
|
||||
// Blames the classifier or the language model if, after running only the
|
||||
// chopper, best_choice is incorrect and no blame has been yet set.
|
||||
// Blames the classifier if best_choice is classifier's top choice and is a
|
||||
// dictionary word (i.e. language model could not have helped).
|
||||
// Otherwise, blames the language model (formerly permuter word adjustment).
|
||||
void BlamerBundle::BlameClassifierOrLangModel(
|
||||
const WERD_RES* word,
|
||||
const UNICHARSET& unicharset, bool valid_permuter, bool debug) {
|
||||
if (valid_permuter) {
|
||||
// Find out whether best choice is a top choice.
|
||||
best_choice_is_dict_and_top_choice_ = true;
|
||||
for (int i = 0; i < word->best_choice->length(); ++i) {
|
||||
BLOB_CHOICE_IT blob_choice_it(word->GetBlobChoices(i));
|
||||
ASSERT_HOST(!blob_choice_it.empty());
|
||||
BLOB_CHOICE *first_choice = NULL;
|
||||
for (blob_choice_it.mark_cycle_pt(); !blob_choice_it.cycled_list();
|
||||
blob_choice_it.forward()) { // find first non-fragment choice
|
||||
if (!(unicharset.get_fragment(blob_choice_it.data()->unichar_id()))) {
|
||||
first_choice = blob_choice_it.data();
|
||||
break;
|
||||
}
|
||||
}
|
||||
ASSERT_HOST(first_choice != NULL);
|
||||
if (first_choice->unichar_id() != word->best_choice->unichar_id(i)) {
|
||||
best_choice_is_dict_and_top_choice_ = false;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
STRING debug_str;
|
||||
if (best_choice_is_dict_and_top_choice_) {
|
||||
debug_str = "Best choice is: incorrect, top choice, dictionary word";
|
||||
debug_str += " with permuter ";
|
||||
debug_str += word->best_choice->permuter_name();
|
||||
}
|
||||
else {
|
||||
debug_str = "Classifier/Old LM tradeoff is to blame";
|
||||
}
|
||||
SetBlame(best_choice_is_dict_and_top_choice_ ? IRR_CLASSIFIER
|
||||
: IRR_CLASS_OLD_LM_TRADEOFF,
|
||||
debug_str, word->best_choice, debug);
|
||||
}
|
||||
|
||||
// Sets up the correct_segmentation_* to mark the correct bounding boxes.
|
||||
void BlamerBundle::SetupCorrectSegmentation(const TWERD* word, bool debug) {
|
||||
params_training_bundle_.StartHypothesisList();
|
||||
if (incorrect_result_reason_ != IRR_CORRECT || !truth_has_char_boxes_)
|
||||
return; // Nothing to do here.
|
||||
|
||||
STRING debug_str;
|
||||
debug_str += "Blamer computing correct_segmentation_cols\n";
|
||||
int curr_box_col = 0;
|
||||
int next_box_col = 0;
|
||||
int num_blobs = word->NumBlobs();
|
||||
if (num_blobs == 0) return; // No blobs to play with.
|
||||
int blob_index = 0;
|
||||
inT16 next_box_x = word->blobs[blob_index]->bounding_box().right();
|
||||
for (int truth_idx = 0; blob_index < num_blobs &&
|
||||
truth_idx < norm_truth_word_.length();
|
||||
++blob_index) {
|
||||
++next_box_col;
|
||||
inT16 curr_box_x = next_box_x;
|
||||
if (blob_index + 1 < num_blobs)
|
||||
next_box_x = word->blobs[blob_index + 1]->bounding_box().right();
|
||||
inT16 truth_x = norm_truth_word_.BlobBox(truth_idx).right();
|
||||
debug_str.add_str_int("Box x coord vs. truth: ", curr_box_x);
|
||||
debug_str.add_str_int(" ", truth_x);
|
||||
debug_str += "\n";
|
||||
if (curr_box_x > (truth_x + norm_box_tolerance_)) {
|
||||
break; // failed to find a matching box
|
||||
}
|
||||
else if (curr_box_x >= truth_x - norm_box_tolerance_ && // matched
|
||||
(blob_index + 1 >= num_blobs || // next box can't be included
|
||||
next_box_x > truth_x + norm_box_tolerance_)) {
|
||||
correct_segmentation_cols_.push_back(curr_box_col);
|
||||
correct_segmentation_rows_.push_back(next_box_col - 1);
|
||||
++truth_idx;
|
||||
debug_str.add_str_int("col=", curr_box_col);
|
||||
debug_str.add_str_int(" row=", next_box_col - 1);
|
||||
debug_str += "\n";
|
||||
curr_box_col = next_box_col;
|
||||
}
|
||||
}
|
||||
if (blob_index < num_blobs || // trailing blobs
|
||||
correct_segmentation_cols_.length() != norm_truth_word_.length()) {
|
||||
debug_str.add_str_int("Blamer failed to find correct segmentation"
|
||||
" (tolerance=", norm_box_tolerance_);
|
||||
if (blob_index >= num_blobs) debug_str += " blob == NULL";
|
||||
debug_str += ")\n";
|
||||
debug_str.add_str_int(" path length ", correct_segmentation_cols_.length());
|
||||
debug_str.add_str_int(" vs. truth ", norm_truth_word_.length());
|
||||
debug_str += "\n";
|
||||
SetBlame(IRR_UNKNOWN, debug_str, NULL, debug);
|
||||
correct_segmentation_cols_.clear();
|
||||
correct_segmentation_rows_.clear();
|
||||
}
|
||||
}
|
||||
|
||||
// Returns true if a guided segmentation search is needed.
|
||||
bool BlamerBundle::GuidedSegsearchNeeded(const WERD_CHOICE *best_choice) const {
|
||||
return incorrect_result_reason_ == IRR_CORRECT &&
|
||||
!segsearch_is_looking_for_blame_ &&
|
||||
truth_has_char_boxes_ &&
|
||||
!ChoiceIsCorrect(best_choice);
|
||||
}
|
||||
|
||||
// Setup ready to guide the segmentation search to the correct segmentation.
|
||||
// The callback pp_cb is used to avoid a cyclic dependency.
|
||||
// It calls into LMPainPoints::GenerateForBlamer by pre-binding the
|
||||
// WERD_RES, and the LMPainPoints itself.
|
||||
// pp_cb must be a permanent callback, and should be deleted by the caller.
|
||||
void BlamerBundle::InitForSegSearch(const WERD_CHOICE *best_choice,
|
||||
MATRIX* ratings, UNICHAR_ID wildcard_id,
|
||||
bool debug, STRING *debug_str,
|
||||
TessResultCallback2<bool, int, int>* cb) {
|
||||
segsearch_is_looking_for_blame_ = true;
|
||||
if (debug) {
|
||||
tprintf("segsearch starting to look for blame\n");
|
||||
}
|
||||
// Fill pain points for any unclassifed blob corresponding to the
|
||||
// correct segmentation state.
|
||||
*debug_str += "Correct segmentation:\n";
|
||||
for (int idx = 0; idx < correct_segmentation_cols_.length(); ++idx) {
|
||||
debug_str->add_str_int("col=", correct_segmentation_cols_[idx]);
|
||||
debug_str->add_str_int(" row=", correct_segmentation_rows_[idx]);
|
||||
*debug_str += "\n";
|
||||
if (!ratings->Classified(correct_segmentation_cols_[idx],
|
||||
correct_segmentation_rows_[idx],
|
||||
wildcard_id) &&
|
||||
!cb->Run(correct_segmentation_cols_[idx],
|
||||
correct_segmentation_rows_[idx])) {
|
||||
segsearch_is_looking_for_blame_ = false;
|
||||
*debug_str += "\nFailed to insert pain point\n";
|
||||
SetBlame(IRR_SEGSEARCH_HEUR, *debug_str, best_choice, debug);
|
||||
break;
|
||||
}
|
||||
} // end for blamer_bundle->correct_segmentation_cols/rows
|
||||
}
|
||||
// Returns true if the guided segsearch is in progress.
|
||||
bool BlamerBundle::GuidedSegsearchStillGoing() const {
|
||||
return segsearch_is_looking_for_blame_;
|
||||
}
|
||||
|
||||
// The segmentation search has ended. Sets the blame appropriately.
|
||||
void BlamerBundle::FinishSegSearch(const WERD_CHOICE *best_choice,
|
||||
bool debug, STRING *debug_str) {
|
||||
// If we are still looking for blame (i.e. best_choice is incorrect, but a
|
||||
// path representing the correct segmentation could be constructed), we can
|
||||
// blame segmentation search pain point prioritization if the rating of the
|
||||
// path corresponding to the correct segmentation is better than that of
|
||||
// best_choice (i.e. language model would have done the correct thing, but
|
||||
// because of poor pain point prioritization the correct segmentation was
|
||||
// never explored). Otherwise we blame the tradeoff between the language model
|
||||
// and the classifier, since even after exploring the path corresponding to
|
||||
// the correct segmentation incorrect best_choice would have been chosen.
|
||||
// One special case when we blame the classifier instead is when best choice
|
||||
// is incorrect, but it is a dictionary word and it classifier's top choice.
|
||||
if (segsearch_is_looking_for_blame_) {
|
||||
segsearch_is_looking_for_blame_ = false;
|
||||
if (best_choice_is_dict_and_top_choice_) {
|
||||
*debug_str = "Best choice is: incorrect, top choice, dictionary word";
|
||||
*debug_str += " with permuter ";
|
||||
*debug_str += best_choice->permuter_name();
|
||||
SetBlame(IRR_CLASSIFIER, *debug_str, best_choice, debug);
|
||||
}
|
||||
else if (best_correctly_segmented_rating_ <
|
||||
best_choice->rating()) {
|
||||
*debug_str += "Correct segmentation state was not explored";
|
||||
SetBlame(IRR_SEGSEARCH_PP, *debug_str, best_choice, debug);
|
||||
}
|
||||
else {
|
||||
if (best_correctly_segmented_rating_ >=
|
||||
WERD_CHOICE::kBadRating) {
|
||||
*debug_str += "Correct segmentation paths were pruned by LM\n";
|
||||
}
|
||||
else {
|
||||
debug_str->add_str_double("Best correct segmentation rating ",
|
||||
best_correctly_segmented_rating_);
|
||||
debug_str->add_str_double(" vs. best choice rating ",
|
||||
best_choice->rating());
|
||||
}
|
||||
SetBlame(IRR_CLASS_LM_TRADEOFF, *debug_str, best_choice, debug);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// If the bundle is null or still does not indicate the correct result,
|
||||
// fix it and use some backup reason for the blame.
|
||||
void BlamerBundle::LastChanceBlame(bool debug, WERD_RES* word) {
|
||||
if (word->blamer_bundle == NULL) {
|
||||
word->blamer_bundle = new BlamerBundle();
|
||||
word->blamer_bundle->SetBlame(IRR_PAGE_LAYOUT, "LastChanceBlame",
|
||||
word->best_choice, debug);
|
||||
}
|
||||
else if (word->blamer_bundle->incorrect_result_reason_ == IRR_NO_TRUTH) {
|
||||
word->blamer_bundle->SetBlame(IRR_NO_TRUTH, "Rejected truth",
|
||||
word->best_choice, debug);
|
||||
}
|
||||
else {
|
||||
bool correct = word->blamer_bundle->ChoiceIsCorrect(word->best_choice);
|
||||
IncorrectResultReason irr = word->blamer_bundle->incorrect_result_reason_;
|
||||
if (irr == IRR_CORRECT && !correct) {
|
||||
STRING debug_str = "Choice is incorrect after recognition";
|
||||
word->blamer_bundle->SetBlame(IRR_UNKNOWN, debug_str, word->best_choice,
|
||||
debug);
|
||||
}
|
||||
else if (irr != IRR_CORRECT && correct) {
|
||||
if (debug) {
|
||||
tprintf("Corrected %s\n", word->blamer_bundle->debug_.string());
|
||||
}
|
||||
word->blamer_bundle->incorrect_result_reason_ = IRR_CORRECT;
|
||||
word->blamer_bundle->debug_ = "";
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Sets the misadaption debug if this word is incorrect, as this word is
|
||||
// being adapted to.
|
||||
void BlamerBundle::SetMisAdaptionDebug(const WERD_CHOICE *best_choice,
|
||||
bool debug) {
|
||||
if (incorrect_result_reason_ != IRR_NO_TRUTH &&
|
||||
!ChoiceIsCorrect(best_choice)) {
|
||||
misadaption_debug_ = "misadapt to word (";
|
||||
misadaption_debug_ += best_choice->permuter_name();
|
||||
misadaption_debug_ += "): ";
|
||||
FillDebugString("", best_choice, &misadaption_debug_);
|
||||
if (debug) {
|
||||
tprintf("%s\n", misadaption_debug_.string());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,333 @@
|
|||
///////////////////////////////////////////////////////////////////////
|
||||
// File: blamer.h
|
||||
// Description: Module allowing precise error causes to be allocated.
|
||||
// Author: Rike Antonova
|
||||
// Refactored: Ray Smith
|
||||
// Created: Mon Feb 04 14:37:01 PST 2013
|
||||
//
|
||||
// (C) Copyright 2013, Google Inc.
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
///////////////////////////////////////////////////////////////////////
|
||||
|
||||
#ifndef TESSERACT_CCSTRUCT_BLAMER_H_
|
||||
#define TESSERACT_CCSTRUCT_BLAMER_H_
|
||||
|
||||
#include <stdio.h>
|
||||
#include "boxword.h"
|
||||
#include "genericvector.h"
|
||||
#include "matrix.h"
|
||||
#include "params_training_featdef.h"
|
||||
#include "ratngs.h"
|
||||
#include "strngs.h"
|
||||
#include "tesscallback.h"
|
||||
|
||||
static const inT16 kBlamerBoxTolerance = 5;
|
||||
|
||||
// Enum for expressing the source of error.
|
||||
// Note: Please update kIncorrectResultReasonNames when modifying this enum.
|
||||
enum IncorrectResultReason {
|
||||
// The text recorded in best choice == truth text
|
||||
IRR_CORRECT,
|
||||
// Either: Top choice is incorrect and is a dictionary word (language model
|
||||
// is unlikely to help correct such errors, so blame the classifier).
|
||||
// Or: the correct unichar was not included in shortlist produced by the
|
||||
// classifier at all.
|
||||
IRR_CLASSIFIER,
|
||||
// Chopper have not found one or more splits that correspond to the correct
|
||||
// character bounding boxes recorded in BlamerBundle::truth_word.
|
||||
IRR_CHOPPER,
|
||||
// Classifier did include correct unichars for each blob in the correct
|
||||
// segmentation, however its rating could have been too bad to allow the
|
||||
// language model to pull out the correct choice. On the other hand the
|
||||
// strength of the language model might have been too weak to favor the
|
||||
// correct answer, this we call this case a classifier-language model
|
||||
// tradeoff error.
|
||||
IRR_CLASS_LM_TRADEOFF,
|
||||
// Page layout failed to produce the correct bounding box. Blame page layout
|
||||
// if the truth was not found for the word, which implies that the bounding
|
||||
// box of the word was incorrect (no truth word had a similar bounding box).
|
||||
IRR_PAGE_LAYOUT,
|
||||
// SegSearch heuristic prevented one or more blobs from the correct
|
||||
// segmentation state to be classified (e.g. the blob was too wide).
|
||||
IRR_SEGSEARCH_HEUR,
|
||||
// The correct segmentaiton state was not explored because of poor SegSearch
|
||||
// pain point prioritization. We blame SegSearch pain point prioritization
|
||||
// if the best rating of a choice constructed from correct segmentation is
|
||||
// better than that of the best choice (i.e. if we got to explore the correct
|
||||
// segmentation state, language model would have picked the correct choice).
|
||||
IRR_SEGSEARCH_PP,
|
||||
// Same as IRR_CLASS_LM_TRADEOFF, but used when we only run chopper on a word,
|
||||
// and thus use the old language model (permuters).
|
||||
// TODO(antonova): integrate the new language mode with chopper
|
||||
IRR_CLASS_OLD_LM_TRADEOFF,
|
||||
// If there is an incorrect adaptive template match with a better score than
|
||||
// a correct one (either pre-trained or adapted), mark this as adaption error.
|
||||
IRR_ADAPTION,
|
||||
// split_and_recog_word() failed to find a suitable split in truth.
|
||||
IRR_NO_TRUTH_SPLIT,
|
||||
// Truth is not available for this word (e.g. when words in corrected content
|
||||
// file are turned into ~~~~ because an appropriate alignment was not found.
|
||||
IRR_NO_TRUTH,
|
||||
// The text recorded in best choice != truth text, but none of the above
|
||||
// reasons are set.
|
||||
IRR_UNKNOWN,
|
||||
|
||||
IRR_NUM_REASONS
|
||||
};
|
||||
|
||||
// Blamer-related information to determine the source of errors.
|
||||
struct BlamerBundle {
|
||||
static const char *IncorrectReasonName(IncorrectResultReason irr);
|
||||
BlamerBundle() : truth_has_char_boxes_(false),
|
||||
incorrect_result_reason_(IRR_CORRECT),
|
||||
lattice_data_(NULL) {
|
||||
ClearResults();
|
||||
}
|
||||
BlamerBundle(const BlamerBundle &other) {
|
||||
this->CopyTruth(other);
|
||||
this->CopyResults(other);
|
||||
}
|
||||
~BlamerBundle() { delete[] lattice_data_; }
|
||||
|
||||
// Accessors.
|
||||
STRING TruthString() const {
|
||||
STRING truth_str;
|
||||
for (int i = 0; i < truth_text_.length(); ++i)
|
||||
truth_str += truth_text_[i];
|
||||
return truth_str;
|
||||
}
|
||||
IncorrectResultReason incorrect_result_reason() const {
|
||||
return incorrect_result_reason_;
|
||||
}
|
||||
bool NoTruth() const {
|
||||
return incorrect_result_reason_ == IRR_NO_TRUTH ||
|
||||
incorrect_result_reason_ == IRR_PAGE_LAYOUT;
|
||||
}
|
||||
bool HasDebugInfo() const {
|
||||
return debug_.length() > 0 || misadaption_debug_.length() > 0;
|
||||
}
|
||||
const STRING& debug() const {
|
||||
return debug_;
|
||||
}
|
||||
const STRING& misadaption_debug() const {
|
||||
return misadaption_debug_;
|
||||
}
|
||||
void UpdateBestRating(float rating) {
|
||||
if (rating < best_correctly_segmented_rating_)
|
||||
best_correctly_segmented_rating_ = rating;
|
||||
}
|
||||
int correct_segmentation_length() const {
|
||||
return correct_segmentation_cols_.length();
|
||||
}
|
||||
// Returns true if the given ratings matrix col,row position is included
|
||||
// in the correct segmentation path at the given index.
|
||||
bool MatrixPositionCorrect(int index, const MATRIX_COORD& coord) {
|
||||
return correct_segmentation_cols_[index] == coord.col &&
|
||||
correct_segmentation_rows_[index] == coord.row;
|
||||
}
|
||||
void set_best_choice_is_dict_and_top_choice(bool value) {
|
||||
best_choice_is_dict_and_top_choice_ = value;
|
||||
}
|
||||
const char* lattice_data() const {
|
||||
return lattice_data_;
|
||||
}
|
||||
int lattice_size() const {
|
||||
return lattice_size_; // size of lattice_data in bytes
|
||||
}
|
||||
void set_lattice_data(const char* data, int size) {
|
||||
lattice_size_ = size;
|
||||
delete[] lattice_data_;
|
||||
lattice_data_ = new char[lattice_size_];
|
||||
memcpy(lattice_data_, data, lattice_size_);
|
||||
}
|
||||
const tesseract::ParamsTrainingBundle& params_training_bundle() const {
|
||||
return params_training_bundle_;
|
||||
}
|
||||
// Adds a new ParamsTrainingHypothesis to the current hypothesis list.
|
||||
void AddHypothesis(const tesseract::ParamsTrainingHypothesis& hypo) {
|
||||
params_training_bundle_.AddHypothesis(hypo);
|
||||
}
|
||||
|
||||
// Functions to setup the blamer.
|
||||
// Whole word string, whole word bounding box.
|
||||
void SetWordTruth(const UNICHARSET& unicharset,
|
||||
const char* truth_str, const TBOX& word_box);
|
||||
// Single "character" string, "character" bounding box.
|
||||
// May be called multiple times to indicate the characters in a word.
|
||||
void SetSymbolTruth(const UNICHARSET& unicharset,
|
||||
const char* char_str, const TBOX& char_box);
|
||||
// Marks that there is something wrong with the truth text, like it contains
|
||||
// reject characters.
|
||||
void SetRejectedTruth();
|
||||
|
||||
// Returns true if the provided word_choice is correct.
|
||||
bool ChoiceIsCorrect(const WERD_CHOICE* word_choice) const;
|
||||
|
||||
void ClearResults() {
|
||||
norm_truth_word_.DeleteAllBoxes();
|
||||
norm_box_tolerance_ = 0;
|
||||
if (!NoTruth()) incorrect_result_reason_ = IRR_CORRECT;
|
||||
debug_ = "";
|
||||
segsearch_is_looking_for_blame_ = false;
|
||||
best_correctly_segmented_rating_ = WERD_CHOICE::kBadRating;
|
||||
correct_segmentation_cols_.clear();
|
||||
correct_segmentation_rows_.clear();
|
||||
best_choice_is_dict_and_top_choice_ = false;
|
||||
delete[] lattice_data_;
|
||||
lattice_data_ = NULL;
|
||||
lattice_size_ = 0;
|
||||
}
|
||||
void CopyTruth(const BlamerBundle &other) {
|
||||
truth_has_char_boxes_ = other.truth_has_char_boxes_;
|
||||
truth_word_ = other.truth_word_;
|
||||
truth_text_ = other.truth_text_;
|
||||
incorrect_result_reason_ =
|
||||
(other.NoTruth() ? other.incorrect_result_reason_ : IRR_CORRECT);
|
||||
}
|
||||
void CopyResults(const BlamerBundle &other) {
|
||||
norm_truth_word_ = other.norm_truth_word_;
|
||||
norm_box_tolerance_ = other.norm_box_tolerance_;
|
||||
incorrect_result_reason_ = other.incorrect_result_reason_;
|
||||
segsearch_is_looking_for_blame_ = other.segsearch_is_looking_for_blame_;
|
||||
best_correctly_segmented_rating_ = other.best_correctly_segmented_rating_;
|
||||
correct_segmentation_cols_ = other.correct_segmentation_cols_;
|
||||
correct_segmentation_rows_ = other.correct_segmentation_rows_;
|
||||
best_choice_is_dict_and_top_choice_ =
|
||||
other.best_choice_is_dict_and_top_choice_;
|
||||
if (other.lattice_data_ != NULL) {
|
||||
lattice_data_ = new char[other.lattice_size_];
|
||||
memcpy(lattice_data_, other.lattice_data_, other.lattice_size_);
|
||||
lattice_size_ = other.lattice_size_;
|
||||
}
|
||||
else {
|
||||
lattice_data_ = NULL;
|
||||
}
|
||||
}
|
||||
const char *IncorrectReason() const;
|
||||
|
||||
// Appends choice and truth details to the given debug string.
|
||||
void FillDebugString(const STRING &msg, const WERD_CHOICE *choice,
|
||||
STRING *debug);
|
||||
|
||||
// Sets up the norm_truth_word from truth_word using the given DENORM.
|
||||
void SetupNormTruthWord(const DENORM& denorm);
|
||||
|
||||
// Splits *this into two pieces in bundle1 and bundle2 (preallocated, empty
|
||||
// bundles) where the right edge/ of the left-hand word is word1_right,
|
||||
// and the left edge of the right-hand word is word2_left.
|
||||
void SplitBundle(int word1_right, int word2_left, bool debug,
|
||||
BlamerBundle* bundle1, BlamerBundle* bundle2) const;
|
||||
// "Joins" the blames from bundle1 and bundle2 into *this.
|
||||
void JoinBlames(const BlamerBundle& bundle1, const BlamerBundle& bundle2,
|
||||
bool debug);
|
||||
|
||||
// If a blob with the same bounding box as one of the truth character
|
||||
// bounding boxes is not classified as the corresponding truth character
|
||||
// blames character classifier for incorrect answer.
|
||||
void BlameClassifier(const UNICHARSET& unicharset,
|
||||
const TBOX& blob_box,
|
||||
const BLOB_CHOICE_LIST& choices,
|
||||
bool debug);
|
||||
|
||||
|
||||
// Checks whether chops were made at all the character bounding box
|
||||
// boundaries in word->truth_word. If not - blames the chopper for an
|
||||
// incorrect answer.
|
||||
void SetChopperBlame(const WERD_RES* word, bool debug);
|
||||
// Blames the classifier or the language model if, after running only the
|
||||
// chopper, best_choice is incorrect and no blame has been yet set.
|
||||
// Blames the classifier if best_choice is classifier's top choice and is a
|
||||
// dictionary word (i.e. language model could not have helped).
|
||||
// Otherwise, blames the language model (formerly permuter word adjustment).
|
||||
void BlameClassifierOrLangModel(
|
||||
const WERD_RES* word,
|
||||
const UNICHARSET& unicharset, bool valid_permuter, bool debug);
|
||||
// Sets up the correct_segmentation_* to mark the correct bounding boxes.
|
||||
void SetupCorrectSegmentation(const TWERD* word, bool debug);
|
||||
|
||||
// Returns true if a guided segmentation search is needed.
|
||||
bool GuidedSegsearchNeeded(const WERD_CHOICE *best_choice) const;
|
||||
// Setup ready to guide the segmentation search to the correct segmentation.
|
||||
// The callback pp_cb is used to avoid a cyclic dependency.
|
||||
// It calls into LMPainPoints::GenerateForBlamer by pre-binding the
|
||||
// WERD_RES, and the LMPainPoints itself.
|
||||
// pp_cb must be a permanent callback, and should be deleted by the caller.
|
||||
void InitForSegSearch(const WERD_CHOICE *best_choice,
|
||||
MATRIX* ratings, UNICHAR_ID wildcard_id,
|
||||
bool debug, STRING *debug_str,
|
||||
TessResultCallback2<bool, int, int>* pp_cb);
|
||||
// Returns true if the guided segsearch is in progress.
|
||||
bool GuidedSegsearchStillGoing() const;
|
||||
// The segmentation search has ended. Sets the blame appropriately.
|
||||
void FinishSegSearch(const WERD_CHOICE *best_choice,
|
||||
bool debug, STRING *debug_str);
|
||||
|
||||
// If the bundle is null or still does not indicate the correct result,
|
||||
// fix it and use some backup reason for the blame.
|
||||
static void LastChanceBlame(bool debug, WERD_RES* word);
|
||||
|
||||
// Sets the misadaption debug if this word is incorrect, as this word is
|
||||
// being adapted to.
|
||||
void SetMisAdaptionDebug(const WERD_CHOICE *best_choice, bool debug);
|
||||
|
||||
private:
|
||||
void SetBlame(IncorrectResultReason irr, const STRING &msg,
|
||||
const WERD_CHOICE *choice, bool debug) {
|
||||
incorrect_result_reason_ = irr;
|
||||
debug_ = IncorrectReason();
|
||||
debug_ += " to blame: ";
|
||||
FillDebugString(msg, choice, &debug_);
|
||||
if (debug) tprintf("SetBlame(): %s", debug_.string());
|
||||
}
|
||||
|
||||
private:
|
||||
// Set to true when bounding boxes for individual unichars are recorded.
|
||||
bool truth_has_char_boxes_;
|
||||
// The true_word (in the original image coordinate space) contains ground
|
||||
// truth bounding boxes for this WERD_RES.
|
||||
tesseract::BoxWord truth_word_;
|
||||
// Same as above, but in normalized coordinates
|
||||
// (filled in by WERD_RES::SetupForRecognition()).
|
||||
tesseract::BoxWord norm_truth_word_;
|
||||
// Tolerance for bounding box comparisons in normalized space.
|
||||
int norm_box_tolerance_;
|
||||
// Contains ground truth unichar for each of the bounding boxes in truth_word.
|
||||
GenericVector<STRING> truth_text_;
|
||||
// The reason for incorrect OCR result.
|
||||
IncorrectResultReason incorrect_result_reason_;
|
||||
// Debug text associated with the blame.
|
||||
STRING debug_;
|
||||
// Misadaption debug information (filled in if this word was misadapted to).
|
||||
STRING misadaption_debug_;
|
||||
// Variables used by the segmentation search when looking for the blame.
|
||||
// Set to true while segmentation search is continued after the usual
|
||||
// termination condition in order to look for the blame.
|
||||
bool segsearch_is_looking_for_blame_;
|
||||
// Best rating for correctly segmented path
|
||||
// (set and used by SegSearch when looking for blame).
|
||||
float best_correctly_segmented_rating_;
|
||||
// Vectors populated by SegSearch to indicate column and row indices that
|
||||
// correspond to blobs with correct bounding boxes.
|
||||
GenericVector<int> correct_segmentation_cols_;
|
||||
GenericVector<int> correct_segmentation_rows_;
|
||||
// Set to true if best choice is a dictionary word and
|
||||
// classifier's top choice.
|
||||
bool best_choice_is_dict_and_top_choice_;
|
||||
// Serialized segmentation search lattice.
|
||||
char *lattice_data_;
|
||||
int lattice_size_; // size of lattice_data in bytes
|
||||
// Information about hypotheses (paths) explored by the segmentation search.
|
||||
tesseract::ParamsTrainingBundle params_training_bundle_;
|
||||
};
|
||||
|
||||
|
||||
#endif // TESSERACT_CCSTRUCT_BLAMER_H_
|
|
@ -0,0 +1,29 @@
|
|||
/**********************************************************************
|
||||
* File: blckerr.h (Formerly blockerr.h)
|
||||
* Description: Error codes for the page block classes.
|
||||
* Author: Ray Smith
|
||||
* Created: Tue Mar 19 17:43:30 GMT 1991
|
||||
*
|
||||
* (C) Copyright 1991, Hewlett-Packard Ltd.
|
||||
** Licensed under the Apache License, Version 2.0 (the "License");
|
||||
** you may not use this file except in compliance with the License.
|
||||
** You may obtain a copy of the License at
|
||||
** http://www.apache.org/licenses/LICENSE-2.0
|
||||
** Unless required by applicable law or agreed to in writing, software
|
||||
** distributed under the License is distributed on an "AS IS" BASIS,
|
||||
** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
** See the License for the specific language governing permissions and
|
||||
** limitations under the License.
|
||||
*
|
||||
**********************************************************************/
|
||||
|
||||
#ifndef BLCKERR_H
|
||||
#define BLCKERR_H
|
||||
|
||||
#include "errcode.h"
|
||||
|
||||
const ERRCODE BADBLOCKLINE = "Y coordinate in block out of bounds";
|
||||
const ERRCODE LOSTBLOCKLINE = "Can't find rectangle for line";
|
||||
const ERRCODE ILLEGAL_GRADIENT = "Gradient wrong side of edge step!";
|
||||
const ERRCODE WRONG_WORD = "Word doesn't have blobs of that type";
|
||||
#endif
|
File diff suppressed because it is too large
Load Diff
|
@ -0,0 +1,847 @@
|
|||
/**********************************************************************
|
||||
* File: blobbox.h (Formerly blobnbox.h)
|
||||
* Description: Code for the textord blob class.
|
||||
* Author: Ray Smith
|
||||
* Created: Thu Jul 30 09:08:51 BST 1992
|
||||
*
|
||||
* (C) Copyright 1992, Hewlett-Packard Ltd.
|
||||
** Licensed under the Apache License, Version 2.0 (the "License");
|
||||
** you may not use this file except in compliance with the License.
|
||||
** You may obtain a copy of the License at
|
||||
** http://www.apache.org/licenses/LICENSE-2.0
|
||||
** Unless required by applicable law or agreed to in writing, software
|
||||
** distributed under the License is distributed on an "AS IS" BASIS,
|
||||
** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
** See the License for the specific language governing permissions and
|
||||
** limitations under the License.
|
||||
*
|
||||
**********************************************************************/
|
||||
|
||||
#ifndef BLOBBOX_H
|
||||
#define BLOBBOX_H
|
||||
|
||||
#include "clst.h"
|
||||
#include "elst2.h"
|
||||
#include "werd.h"
|
||||
#include "ocrblock.h"
|
||||
#include "statistc.h"
|
||||
|
||||
enum PITCH_TYPE
|
||||
{
|
||||
PITCH_DUNNO, // insufficient data
|
||||
PITCH_DEF_FIXED, // definitely fixed
|
||||
PITCH_MAYBE_FIXED, // could be
|
||||
PITCH_DEF_PROP,
|
||||
PITCH_MAYBE_PROP,
|
||||
PITCH_CORR_FIXED,
|
||||
PITCH_CORR_PROP
|
||||
};
|
||||
|
||||
// The possible tab-stop types of each side of a BLOBNBOX.
|
||||
// The ordering is important, as it is used for deleting dead-ends in the
|
||||
// search. ALIGNED, CONFIRMED and VLINE should remain greater than the
|
||||
// non-aligned, unset, or deleted members.
|
||||
enum TabType {
|
||||
TT_NONE, // Not a tab.
|
||||
TT_DELETED, // Not a tab after detailed analysis.
|
||||
TT_MAYBE_RAGGED, // Initial designation of a tab-stop candidate.
|
||||
TT_MAYBE_ALIGNED, // Initial designation of a tab-stop candidate.
|
||||
TT_CONFIRMED, // Aligned with neighbours.
|
||||
TT_VLINE // Detected as a vertical line.
|
||||
};
|
||||
|
||||
// The possible region types of a BLOBNBOX.
|
||||
// Note: keep all the text types > BRT_UNKNOWN and all the image types less.
|
||||
// Keep in sync with kBlobTypes in colpartition.cpp and BoxColor, and the
|
||||
// *Type static functions below.
|
||||
enum BlobRegionType {
|
||||
BRT_NOISE, // Neither text nor image.
|
||||
BRT_HLINE, // Horizontal separator line.
|
||||
BRT_VLINE, // Vertical separator line.
|
||||
BRT_RECTIMAGE, // Rectangular image.
|
||||
BRT_POLYIMAGE, // Non-rectangular image.
|
||||
BRT_UNKNOWN, // Not determined yet.
|
||||
BRT_VERT_TEXT, // Vertical alignment, not necessarily vertically oriented.
|
||||
BRT_TEXT, // Convincing text.
|
||||
|
||||
BRT_COUNT // Number of possibilities.
|
||||
};
|
||||
|
||||
// enum for elements of arrays that refer to neighbours.
|
||||
// NOTE: keep in this order, so ^2 can be used to flip direction.
|
||||
enum BlobNeighbourDir {
|
||||
BND_LEFT,
|
||||
BND_BELOW,
|
||||
BND_RIGHT,
|
||||
BND_ABOVE,
|
||||
BND_COUNT
|
||||
};
|
||||
|
||||
// enum for special type of text characters, such as math symbol or italic.
|
||||
enum BlobSpecialTextType {
|
||||
BSTT_NONE, // No special.
|
||||
BSTT_ITALIC, // Italic style.
|
||||
BSTT_DIGIT, // Digit symbols.
|
||||
BSTT_MATH, // Mathmatical symobls (not including digit).
|
||||
BSTT_UNCLEAR, // Characters with low recognition rate.
|
||||
BSTT_SKIP, // Characters that we skip labeling (usually too small).
|
||||
BSTT_COUNT
|
||||
};
|
||||
|
||||
inline BlobNeighbourDir DirOtherWay(BlobNeighbourDir dir) {
|
||||
return static_cast<BlobNeighbourDir>(dir ^ 2);
|
||||
}
|
||||
|
||||
// BlobTextFlowType indicates the quality of neighbouring information
|
||||
// related to a chain of connected components, either horizontally or
|
||||
// vertically. Also used by ColPartition for the collection of blobs
|
||||
// within, which should all have the same value in most cases.
|
||||
enum BlobTextFlowType {
|
||||
BTFT_NONE, // No text flow set yet.
|
||||
BTFT_NONTEXT, // Flow too poor to be likely text.
|
||||
BTFT_NEIGHBOURS, // Neighbours support flow in this direction.
|
||||
BTFT_CHAIN, // There is a weak chain of text in this direction.
|
||||
BTFT_STRONG_CHAIN, // There is a strong chain of text in this direction.
|
||||
BTFT_TEXT_ON_IMAGE, // There is a strong chain of text on an image.
|
||||
BTFT_LEADER, // Leader dots/dashes etc.
|
||||
BTFT_COUNT
|
||||
};
|
||||
|
||||
// Returns true if type1 dominates type2 in a merge. Mostly determined by the
|
||||
// ordering of the enum, LEADER is weak and dominates nothing.
|
||||
// The function is anti-symmetric (t1 > t2) === !(t2 > t1), except that
|
||||
// this cannot be true if t1 == t2, so the result is undefined.
|
||||
inline bool DominatesInMerge(BlobTextFlowType type1, BlobTextFlowType type2) {
|
||||
// LEADER always loses.
|
||||
if (type1 == BTFT_LEADER) return false;
|
||||
if (type2 == BTFT_LEADER) return true;
|
||||
// With those out of the way, the ordering of the enum determines the result.
|
||||
return type1 >= type2;
|
||||
}
|
||||
|
||||
namespace tesseract {
|
||||
class ColPartition;
|
||||
}
|
||||
|
||||
class BLOBNBOX;
|
||||
ELISTIZEH(BLOBNBOX)
|
||||
class BLOBNBOX :public ELIST_LINK
|
||||
{
|
||||
public:
|
||||
BLOBNBOX() {
|
||||
ConstructionInit();
|
||||
}
|
||||
explicit BLOBNBOX(C_BLOB *srcblob) {
|
||||
box = srcblob->bounding_box();
|
||||
ConstructionInit();
|
||||
cblob_ptr = srcblob;
|
||||
area = static_cast<int>(srcblob->area());
|
||||
}
|
||||
~BLOBNBOX() {
|
||||
if (owns_cblob_) delete cblob_ptr;
|
||||
}
|
||||
static BLOBNBOX* RealBlob(C_OUTLINE* outline) {
|
||||
C_BLOB* blob = new C_BLOB(outline);
|
||||
return new BLOBNBOX(blob);
|
||||
}
|
||||
|
||||
// Rotates the box and the underlying blob.
|
||||
void rotate(FCOORD rotation);
|
||||
|
||||
// Methods that act on the box without touching the underlying blob.
|
||||
// Reflect the box in the y-axis, leaving the underlying blob untouched.
|
||||
void reflect_box_in_y_axis();
|
||||
// Rotates the box by the angle given by rotation.
|
||||
// If the blob is a diacritic, then only small rotations for skew
|
||||
// correction can be applied.
|
||||
void rotate_box(FCOORD rotation);
|
||||
// Moves just the box by the given vector.
|
||||
void translate_box(ICOORD v) {
|
||||
if (IsDiacritic()) {
|
||||
box.move(v);
|
||||
base_char_top_ += v.y();
|
||||
base_char_bottom_ += v.y();
|
||||
}
|
||||
else {
|
||||
box.move(v);
|
||||
set_diacritic_box(box);
|
||||
}
|
||||
}
|
||||
void merge(BLOBNBOX *nextblob);
|
||||
void really_merge(BLOBNBOX* other);
|
||||
void chop( // fake chop blob
|
||||
BLOBNBOX_IT *start_it, // location of this
|
||||
BLOBNBOX_IT *blob_it, // iterator
|
||||
FCOORD rotation, // for landscape
|
||||
float xheight); // line height
|
||||
|
||||
void NeighbourGaps(int gaps[BND_COUNT]) const;
|
||||
void MinMaxGapsClipped(int* h_min, int* h_max,
|
||||
int* v_min, int* v_max) const;
|
||||
void CleanNeighbours();
|
||||
// Returns positive if there is at least one side neighbour that has a
|
||||
// similar stroke width and is not on the other side of a rule line.
|
||||
int GoodTextBlob() const;
|
||||
// Returns the number of side neighbours that are of type BRT_NOISE.
|
||||
int NoisyNeighbours() const;
|
||||
|
||||
// Returns true if the blob is noise and has no owner.
|
||||
bool DeletableNoise() const {
|
||||
return owner() == NULL && region_type() == BRT_NOISE;
|
||||
}
|
||||
|
||||
// Returns true, and sets vert_possible/horz_possible if the blob has some
|
||||
// feature that makes it individually appear to flow one way.
|
||||
// eg if it has a high aspect ratio, yet has a complex shape, such as a
|
||||
// joined word in Latin, Arabic, or Hindi, rather than being a -, I, l, 1.
|
||||
bool DefiniteIndividualFlow();
|
||||
|
||||
// Returns true if there is no tabstop violation in merging this and other.
|
||||
bool ConfirmNoTabViolation(const BLOBNBOX& other) const;
|
||||
|
||||
// Returns true if other has a similar stroke width to this.
|
||||
bool MatchingStrokeWidth(const BLOBNBOX& other,
|
||||
double fractional_tolerance,
|
||||
double constant_tolerance) const;
|
||||
|
||||
// Returns a bounding box of the outline contained within the
|
||||
// given horizontal range.
|
||||
TBOX BoundsWithinLimits(int left, int right);
|
||||
|
||||
// Estimates and stores the baseline position based on the shape of the
|
||||
// outline.
|
||||
void EstimateBaselinePosition();
|
||||
|
||||
// Simple accessors.
|
||||
const TBOX& bounding_box() const {
|
||||
return box;
|
||||
}
|
||||
// Set the bounding box. Use with caution.
|
||||
// Normally use compute_bounding_box instead.
|
||||
void set_bounding_box(const TBOX& new_box) {
|
||||
box = new_box;
|
||||
base_char_top_ = box.top();
|
||||
base_char_bottom_ = box.bottom();
|
||||
}
|
||||
void compute_bounding_box() {
|
||||
box = cblob_ptr->bounding_box();
|
||||
base_char_top_ = box.top();
|
||||
base_char_bottom_ = box.bottom();
|
||||
baseline_y_ = box.bottom();
|
||||
}
|
||||
const TBOX& reduced_box() const {
|
||||
return red_box;
|
||||
}
|
||||
void set_reduced_box(TBOX new_box) {
|
||||
red_box = new_box;
|
||||
reduced = TRUE;
|
||||
}
|
||||
inT32 enclosed_area() const {
|
||||
return area;
|
||||
}
|
||||
bool joined_to_prev() const {
|
||||
return joined != 0;
|
||||
}
|
||||
bool red_box_set() const {
|
||||
return reduced != 0;
|
||||
}
|
||||
int repeated_set() const {
|
||||
return repeated_set_;
|
||||
}
|
||||
void set_repeated_set(int set_id) {
|
||||
repeated_set_ = set_id;
|
||||
}
|
||||
C_BLOB *cblob() const {
|
||||
return cblob_ptr;
|
||||
}
|
||||
TabType left_tab_type() const {
|
||||
return left_tab_type_;
|
||||
}
|
||||
void set_left_tab_type(TabType new_type) {
|
||||
left_tab_type_ = new_type;
|
||||
}
|
||||
TabType right_tab_type() const {
|
||||
return right_tab_type_;
|
||||
}
|
||||
void set_right_tab_type(TabType new_type) {
|
||||
right_tab_type_ = new_type;
|
||||
}
|
||||
BlobRegionType region_type() const {
|
||||
return region_type_;
|
||||
}
|
||||
void set_region_type(BlobRegionType new_type) {
|
||||
region_type_ = new_type;
|
||||
}
|
||||
BlobSpecialTextType special_text_type() const {
|
||||
return spt_type_;
|
||||
}
|
||||
void set_special_text_type(BlobSpecialTextType new_type) {
|
||||
spt_type_ = new_type;
|
||||
}
|
||||
BlobTextFlowType flow() const {
|
||||
return flow_;
|
||||
}
|
||||
void set_flow(BlobTextFlowType value) {
|
||||
flow_ = value;
|
||||
}
|
||||
bool vert_possible() const {
|
||||
return vert_possible_;
|
||||
}
|
||||
void set_vert_possible(bool value) {
|
||||
vert_possible_ = value;
|
||||
}
|
||||
bool horz_possible() const {
|
||||
return horz_possible_;
|
||||
}
|
||||
void set_horz_possible(bool value) {
|
||||
horz_possible_ = value;
|
||||
}
|
||||
int left_rule() const {
|
||||
return left_rule_;
|
||||
}
|
||||
void set_left_rule(int new_left) {
|
||||
left_rule_ = new_left;
|
||||
}
|
||||
int right_rule() const {
|
||||
return right_rule_;
|
||||
}
|
||||
void set_right_rule(int new_right) {
|
||||
right_rule_ = new_right;
|
||||
}
|
||||
int left_crossing_rule() const {
|
||||
return left_crossing_rule_;
|
||||
}
|
||||
void set_left_crossing_rule(int new_left) {
|
||||
left_crossing_rule_ = new_left;
|
||||
}
|
||||
int right_crossing_rule() const {
|
||||
return right_crossing_rule_;
|
||||
}
|
||||
void set_right_crossing_rule(int new_right) {
|
||||
right_crossing_rule_ = new_right;
|
||||
}
|
||||
float horz_stroke_width() const {
|
||||
return horz_stroke_width_;
|
||||
}
|
||||
void set_horz_stroke_width(float width) {
|
||||
horz_stroke_width_ = width;
|
||||
}
|
||||
float vert_stroke_width() const {
|
||||
return vert_stroke_width_;
|
||||
}
|
||||
void set_vert_stroke_width(float width) {
|
||||
vert_stroke_width_ = width;
|
||||
}
|
||||
float area_stroke_width() const {
|
||||
return area_stroke_width_;
|
||||
}
|
||||
tesseract::ColPartition* owner() const {
|
||||
return owner_;
|
||||
}
|
||||
void set_owner(tesseract::ColPartition* new_owner) {
|
||||
owner_ = new_owner;
|
||||
}
|
||||
bool leader_on_left() const {
|
||||
return leader_on_left_;
|
||||
}
|
||||
void set_leader_on_left(bool flag) {
|
||||
leader_on_left_ = flag;
|
||||
}
|
||||
bool leader_on_right() const {
|
||||
return leader_on_right_;
|
||||
}
|
||||
void set_leader_on_right(bool flag) {
|
||||
leader_on_right_ = flag;
|
||||
}
|
||||
BLOBNBOX* neighbour(BlobNeighbourDir n) const {
|
||||
return neighbours_[n];
|
||||
}
|
||||
bool good_stroke_neighbour(BlobNeighbourDir n) const {
|
||||
return good_stroke_neighbours_[n];
|
||||
}
|
||||
void set_neighbour(BlobNeighbourDir n, BLOBNBOX* neighbour, bool good) {
|
||||
neighbours_[n] = neighbour;
|
||||
good_stroke_neighbours_[n] = good;
|
||||
}
|
||||
bool IsDiacritic() const {
|
||||
return base_char_top_ != box.top() || base_char_bottom_ != box.bottom();
|
||||
}
|
||||
int base_char_top() const {
|
||||
return base_char_top_;
|
||||
}
|
||||
int base_char_bottom() const {
|
||||
return base_char_bottom_;
|
||||
}
|
||||
int baseline_position() const {
|
||||
return baseline_y_;
|
||||
}
|
||||
int line_crossings() const {
|
||||
return line_crossings_;
|
||||
}
|
||||
void set_line_crossings(int value) {
|
||||
line_crossings_ = value;
|
||||
}
|
||||
void set_diacritic_box(const TBOX& diacritic_box) {
|
||||
base_char_top_ = diacritic_box.top();
|
||||
base_char_bottom_ = diacritic_box.bottom();
|
||||
}
|
||||
BLOBNBOX* base_char_blob() const {
|
||||
return base_char_blob_;
|
||||
}
|
||||
void set_base_char_blob(BLOBNBOX* blob) {
|
||||
base_char_blob_ = blob;
|
||||
}
|
||||
void set_owns_cblob(bool value) { owns_cblob_ = value; }
|
||||
|
||||
bool UniquelyVertical() const {
|
||||
return vert_possible_ && !horz_possible_;
|
||||
}
|
||||
bool UniquelyHorizontal() const {
|
||||
return horz_possible_ && !vert_possible_;
|
||||
}
|
||||
|
||||
// Returns true if the region type is text.
|
||||
static bool IsTextType(BlobRegionType type) {
|
||||
return type == BRT_TEXT || type == BRT_VERT_TEXT;
|
||||
}
|
||||
// Returns true if the region type is image.
|
||||
static bool IsImageType(BlobRegionType type) {
|
||||
return type == BRT_RECTIMAGE || type == BRT_POLYIMAGE;
|
||||
}
|
||||
// Returns true if the region type is line.
|
||||
static bool IsLineType(BlobRegionType type) {
|
||||
return type == BRT_HLINE || type == BRT_VLINE;
|
||||
}
|
||||
// Returns true if the region type cannot be merged.
|
||||
static bool UnMergeableType(BlobRegionType type) {
|
||||
return IsLineType(type) || IsImageType(type);
|
||||
}
|
||||
// Helper to call CleanNeighbours on all blobs on the list.
|
||||
static void CleanNeighbours(BLOBNBOX_LIST* blobs);
|
||||
// Helper to delete all the deletable blobs on the list.
|
||||
static void DeleteNoiseBlobs(BLOBNBOX_LIST* blobs);
|
||||
// Helper to compute edge offsets for all the blobs on the list.
|
||||
// See coutln.h for an explanation of edge offsets.
|
||||
static void ComputeEdgeOffsets(Pix* thresholds, Pix* grey,
|
||||
BLOBNBOX_LIST* blobs);
|
||||
|
||||
#ifndef GRAPHICS_DISABLED
|
||||
// Helper to draw all the blobs on the list in the given body_colour,
|
||||
// with child outlines in the child_colour.
|
||||
static void PlotBlobs(BLOBNBOX_LIST* list,
|
||||
ScrollView::Color body_colour,
|
||||
ScrollView::Color child_colour,
|
||||
ScrollView* win);
|
||||
// Helper to draw only DeletableNoise blobs (unowned, BRT_NOISE) on the
|
||||
// given list in the given body_colour, with child outlines in the
|
||||
// child_colour.
|
||||
static void PlotNoiseBlobs(BLOBNBOX_LIST* list,
|
||||
ScrollView::Color body_colour,
|
||||
ScrollView::Color child_colour,
|
||||
ScrollView* win);
|
||||
|
||||
static ScrollView::Color TextlineColor(BlobRegionType region_type,
|
||||
BlobTextFlowType flow_type);
|
||||
|
||||
// Keep in sync with BlobRegionType.
|
||||
ScrollView::Color BoxColor() const;
|
||||
|
||||
void plot(ScrollView* window, // window to draw in
|
||||
ScrollView::Color blob_colour, // for outer bits
|
||||
ScrollView::Color child_colour); // for holes
|
||||
#endif
|
||||
|
||||
// Initializes the bulk of the members to default values for use at
|
||||
// construction time.
|
||||
void ConstructionInit() {
|
||||
cblob_ptr = NULL;
|
||||
owns_cblob_ = false;
|
||||
area = 0;
|
||||
area_stroke_width_ = 0.0f;
|
||||
horz_stroke_width_ = 0.0f;
|
||||
vert_stroke_width_ = 0.0f;
|
||||
ReInit();
|
||||
}
|
||||
// Initializes members set by StrokeWidth and beyond, without discarding
|
||||
// stored area and strokewidth values, which are expensive to calculate.
|
||||
void ReInit() {
|
||||
joined = false;
|
||||
reduced = false;
|
||||
repeated_set_ = 0;
|
||||
left_tab_type_ = TT_NONE;
|
||||
right_tab_type_ = TT_NONE;
|
||||
region_type_ = BRT_UNKNOWN;
|
||||
flow_ = BTFT_NONE;
|
||||
spt_type_ = BSTT_SKIP;
|
||||
left_rule_ = 0;
|
||||
right_rule_ = 0;
|
||||
left_crossing_rule_ = 0;
|
||||
right_crossing_rule_ = 0;
|
||||
if (area_stroke_width_ == 0.0f && area > 0 && cblob() != NULL)
|
||||
area_stroke_width_ = 2.0f * area / cblob()->perimeter();
|
||||
owner_ = NULL;
|
||||
base_char_top_ = box.top();
|
||||
base_char_bottom_ = box.bottom();
|
||||
baseline_y_ = box.bottom();
|
||||
line_crossings_ = 0;
|
||||
base_char_blob_ = NULL;
|
||||
horz_possible_ = false;
|
||||
vert_possible_ = false;
|
||||
leader_on_left_ = false;
|
||||
leader_on_right_ = false;
|
||||
ClearNeighbours();
|
||||
}
|
||||
|
||||
void ClearNeighbours() {
|
||||
for (int n = 0; n < BND_COUNT; ++n) {
|
||||
neighbours_[n] = NULL;
|
||||
good_stroke_neighbours_[n] = false;
|
||||
}
|
||||
}
|
||||
|
||||
private:
|
||||
C_BLOB *cblob_ptr; // edgestep blob
|
||||
TBOX box; // bounding box
|
||||
TBOX red_box; // bounding box
|
||||
int area : 30; // enclosed area
|
||||
int joined : 1; // joined to prev
|
||||
int reduced : 1; // reduced box set
|
||||
int repeated_set_; // id of the set of repeated blobs
|
||||
TabType left_tab_type_; // Indicates tab-stop assessment
|
||||
TabType right_tab_type_; // Indicates tab-stop assessment
|
||||
BlobRegionType region_type_; // Type of region this blob belongs to
|
||||
BlobTextFlowType flow_; // Quality of text flow.
|
||||
inT16 left_rule_; // x-coord of nearest but not crossing rule line
|
||||
inT16 right_rule_; // x-coord of nearest but not crossing rule line
|
||||
inT16 left_crossing_rule_; // x-coord of nearest or crossing rule line
|
||||
inT16 right_crossing_rule_; // x-coord of nearest or crossing rule line
|
||||
inT16 base_char_top_; // y-coord of top/bottom of diacritic base,
|
||||
inT16 base_char_bottom_; // if it exists else top/bottom of this blob.
|
||||
inT16 baseline_y_; // Estimate of baseline position.
|
||||
int line_crossings_; // Number of line intersections touched.
|
||||
BLOBNBOX* base_char_blob_; // The blob that was the base char.
|
||||
float horz_stroke_width_; // Median horizontal stroke width
|
||||
float vert_stroke_width_; // Median vertical stroke width
|
||||
float area_stroke_width_; // Stroke width from area/perimeter ratio.
|
||||
tesseract::ColPartition* owner_; // Who will delete me when I am not needed
|
||||
BlobSpecialTextType spt_type_; // Special text type.
|
||||
BLOBNBOX* neighbours_[BND_COUNT];
|
||||
bool good_stroke_neighbours_[BND_COUNT];
|
||||
bool horz_possible_; // Could be part of horizontal flow.
|
||||
bool vert_possible_; // Could be part of vertical flow.
|
||||
bool leader_on_left_; // There is a leader to the left.
|
||||
bool leader_on_right_; // There is a leader to the right.
|
||||
// Iff true, then the destructor should delete the cblob_ptr.
|
||||
// TODO(rays) migrate all uses to correctly setting this flag instead of
|
||||
// deleting the C_BLOB before deleting the BLOBNBOX.
|
||||
bool owns_cblob_;
|
||||
};
|
||||
|
||||
class TO_ROW : public ELIST2_LINK
|
||||
{
|
||||
public:
|
||||
static const int kErrorWeight = 3;
|
||||
|
||||
TO_ROW() {
|
||||
clear();
|
||||
} //empty
|
||||
TO_ROW( //constructor
|
||||
BLOBNBOX *blob, //from first blob
|
||||
float top, //of row //target height
|
||||
float bottom,
|
||||
float row_size);
|
||||
|
||||
void print() const;
|
||||
float max_y() const { //access function
|
||||
return y_max;
|
||||
}
|
||||
float min_y() const {
|
||||
return y_min;
|
||||
}
|
||||
float mean_y() const {
|
||||
return (y_min + y_max) / 2.0f;
|
||||
}
|
||||
float initial_min_y() const {
|
||||
return initial_y_min;
|
||||
}
|
||||
float line_m() const { //access to line fit
|
||||
return m;
|
||||
}
|
||||
float line_c() const {
|
||||
return c;
|
||||
}
|
||||
float line_error() const {
|
||||
return error;
|
||||
}
|
||||
float parallel_c() const {
|
||||
return para_c;
|
||||
}
|
||||
float parallel_error() const {
|
||||
return para_error;
|
||||
}
|
||||
float believability() const { //baseline goodness
|
||||
return credibility;
|
||||
}
|
||||
float intercept() const { //real parallel_c
|
||||
return y_origin;
|
||||
}
|
||||
void add_blob( //put in row
|
||||
BLOBNBOX *blob, //blob to add
|
||||
float top, //of row //target height
|
||||
float bottom,
|
||||
float row_size);
|
||||
void insert_blob( //put in row in order
|
||||
BLOBNBOX *blob);
|
||||
|
||||
BLOBNBOX_LIST *blob_list() { //get list
|
||||
return &blobs;
|
||||
}
|
||||
|
||||
void set_line( //set line spec
|
||||
float new_m, //line to set
|
||||
float new_c,
|
||||
float new_error) {
|
||||
m = new_m;
|
||||
c = new_c;
|
||||
error = new_error;
|
||||
}
|
||||
void set_parallel_line( //set fixed gradient line
|
||||
float gradient, //page gradient
|
||||
float new_c,
|
||||
float new_error) {
|
||||
para_c = new_c;
|
||||
para_error = new_error;
|
||||
credibility =
|
||||
(float)(blobs.length() - kErrorWeight * new_error);
|
||||
y_origin = (float)(new_c / sqrt(1 + gradient * gradient));
|
||||
//real intercept
|
||||
}
|
||||
void set_limits( //set min,max
|
||||
float new_min, //bottom and
|
||||
float new_max) { //top of row
|
||||
y_min = new_min;
|
||||
y_max = new_max;
|
||||
}
|
||||
void compute_vertical_projection();
|
||||
//get projection
|
||||
|
||||
bool rep_chars_marked() const {
|
||||
return num_repeated_sets_ != -1;
|
||||
}
|
||||
void clear_rep_chars_marked() {
|
||||
num_repeated_sets_ = -1;
|
||||
}
|
||||
int num_repeated_sets() const {
|
||||
return num_repeated_sets_;
|
||||
}
|
||||
void set_num_repeated_sets(int num_sets) {
|
||||
num_repeated_sets_ = num_sets;
|
||||
}
|
||||
|
||||
// true when dead
|
||||
BOOL8 merged;
|
||||
BOOL8 all_caps; // had no ascenders
|
||||
BOOL8 used_dm_model; // in guessing pitch
|
||||
inT16 projection_left; // start of projection
|
||||
inT16 projection_right; // start of projection
|
||||
PITCH_TYPE pitch_decision; // how strong is decision
|
||||
float fixed_pitch; // pitch or 0
|
||||
float fp_space; // sp if fixed pitch
|
||||
float fp_nonsp; // nonsp if fixed pitch
|
||||
float pr_space; // sp if prop
|
||||
float pr_nonsp; // non sp if prop
|
||||
float spacing; // to "next" row
|
||||
float xheight; // of line
|
||||
int xheight_evidence; // number of blobs of height xheight
|
||||
float ascrise; // ascenders
|
||||
float descdrop; // descenders
|
||||
float body_size; // of CJK characters. Assumed to be
|
||||
// xheight+ascrise for non-CJK text.
|
||||
inT32 min_space; // min size for real space
|
||||
inT32 max_nonspace; // max size of non-space
|
||||
inT32 space_threshold; // space vs nonspace
|
||||
float kern_size; // average non-space
|
||||
float space_size; // average space
|
||||
WERD_LIST rep_words; // repeated chars
|
||||
ICOORDELT_LIST char_cells; // fixed pitch cells
|
||||
QSPLINE baseline; // curved baseline
|
||||
STATS projection; // vertical projection
|
||||
|
||||
private:
|
||||
void clear(); // clear all values to reasonable defaults
|
||||
|
||||
BLOBNBOX_LIST blobs; //blobs in row
|
||||
float y_min; //coords
|
||||
float y_max;
|
||||
float initial_y_min;
|
||||
float m, c; //line spec
|
||||
float error; //line error
|
||||
float para_c; //constrained fit
|
||||
float para_error;
|
||||
float y_origin; //rotated para_c;
|
||||
float credibility; //baseline believability
|
||||
int num_repeated_sets_; // number of sets of repeated blobs
|
||||
// set to -1 if we have not searched
|
||||
// for repeated blobs in this row yet
|
||||
};
|
||||
|
||||
ELIST2IZEH(TO_ROW)
|
||||
class TO_BLOCK :public ELIST_LINK
|
||||
{
|
||||
public:
|
||||
TO_BLOCK() : pitch_decision(PITCH_DUNNO) {
|
||||
clear();
|
||||
} //empty
|
||||
TO_BLOCK( //constructor
|
||||
BLOCK *src_block); //real block
|
||||
~TO_BLOCK();
|
||||
|
||||
void clear(); // clear all scalar members.
|
||||
|
||||
TO_ROW_LIST *get_rows() { //access function
|
||||
return &row_list;
|
||||
}
|
||||
|
||||
// Rotate all the blobnbox lists and the underlying block. Then update the
|
||||
// median size statistic from the blobs list.
|
||||
void rotate(const FCOORD& rotation) {
|
||||
BLOBNBOX_LIST* blobnbox_list[] = { &blobs, &underlines, &noise_blobs,
|
||||
&small_blobs, &large_blobs, NULL };
|
||||
for (BLOBNBOX_LIST** list = blobnbox_list; *list != NULL; ++list) {
|
||||
BLOBNBOX_IT it(*list);
|
||||
for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
|
||||
it.data()->rotate(rotation);
|
||||
}
|
||||
}
|
||||
// Rotate the block
|
||||
ASSERT_HOST(block->poly_block() != NULL);
|
||||
block->rotate(rotation);
|
||||
// Update the median size statistic from the blobs list.
|
||||
STATS widths(0, block->bounding_box().width());
|
||||
STATS heights(0, block->bounding_box().height());
|
||||
BLOBNBOX_IT blob_it(&blobs);
|
||||
for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) {
|
||||
widths.add(blob_it.data()->bounding_box().width(), 1);
|
||||
heights.add(blob_it.data()->bounding_box().height(), 1);
|
||||
}
|
||||
block->set_median_size(static_cast<int>(widths.median() + 0.5),
|
||||
static_cast<int>(heights.median() + 0.5));
|
||||
}
|
||||
|
||||
void print_rows() { //debug info
|
||||
TO_ROW_IT row_it = &row_list;
|
||||
TO_ROW *row;
|
||||
|
||||
for (row_it.mark_cycle_pt(); !row_it.cycled_list();
|
||||
row_it.forward()) {
|
||||
row = row_it.data();
|
||||
tprintf("Row range (%g,%g), para_c=%g, blobcount=" INT32FORMAT
|
||||
"\n", row->min_y(), row->max_y(), row->parallel_c(),
|
||||
row->blob_list()->length());
|
||||
}
|
||||
}
|
||||
|
||||
// Reorganizes the blob lists with a different definition of small, medium
|
||||
// and large, compared to the original definition.
|
||||
// Height is still the primary filter key, but medium width blobs of small
|
||||
// height become medium, and very wide blobs of small height stay small.
|
||||
void ReSetAndReFilterBlobs();
|
||||
|
||||
// Deletes noise blobs from all lists where not owned by a ColPartition.
|
||||
void DeleteUnownedNoise();
|
||||
|
||||
// Computes and stores the edge offsets on each blob for use in feature
|
||||
// extraction, using greyscale if the supplied grey and thresholds pixes
|
||||
// are 8-bit or otherwise (if NULL or not 8 bit) the original binary
|
||||
// edge step outlines.
|
||||
// Thresholds must either be the same size as grey or an integer down-scale
|
||||
// of grey.
|
||||
// See coutln.h for an explanation of edge offsets.
|
||||
void ComputeEdgeOffsets(Pix* thresholds, Pix* grey);
|
||||
|
||||
#ifndef GRAPHICS_DISABLED
|
||||
// Draw the noise blobs from all lists in red.
|
||||
void plot_noise_blobs(ScrollView* to_win);
|
||||
// Draw the blobs on on the various lists in the block in different colors.
|
||||
void plot_graded_blobs(ScrollView* to_win);
|
||||
#endif
|
||||
|
||||
BLOBNBOX_LIST blobs; //medium size
|
||||
BLOBNBOX_LIST underlines; //underline blobs
|
||||
BLOBNBOX_LIST noise_blobs; //very small
|
||||
BLOBNBOX_LIST small_blobs; //fairly small
|
||||
BLOBNBOX_LIST large_blobs; //big blobs
|
||||
BLOCK *block; //real block
|
||||
PITCH_TYPE pitch_decision; //how strong is decision
|
||||
float line_spacing; //estimate
|
||||
// line_size is a lower-bound estimate of the font size in pixels of
|
||||
// the text in the block (with ascenders and descenders), being a small
|
||||
// (1.25) multiple of the median height of filtered blobs.
|
||||
// In most cases the font size will be bigger, but it will be closer
|
||||
// if the text is allcaps, or in a no-x-height script.
|
||||
float line_size; //estimate
|
||||
float max_blob_size; //line assignment limit
|
||||
float baseline_offset; //phase shift
|
||||
float xheight; //median blob size
|
||||
float fixed_pitch; //pitch or 0
|
||||
float kern_size; //average non-space
|
||||
float space_size; //average space
|
||||
inT32 min_space; //min definite space
|
||||
inT32 max_nonspace; //max definite
|
||||
float fp_space; //sp if fixed pitch
|
||||
float fp_nonsp; //nonsp if fixed pitch
|
||||
float pr_space; //sp if prop
|
||||
float pr_nonsp; //non sp if prop
|
||||
TO_ROW *key_row; //starting row
|
||||
|
||||
private:
|
||||
TO_ROW_LIST row_list; //temporary rows
|
||||
};
|
||||
|
||||
ELISTIZEH(TO_BLOCK)
|
||||
extern double_VAR_H(textord_error_weight, 3,
|
||||
"Weighting for error in believability");
|
||||
void find_cblob_limits( //get y limits
|
||||
C_BLOB *blob, //blob to search
|
||||
float leftx, //x limits
|
||||
float rightx,
|
||||
FCOORD rotation, //for landscape
|
||||
float &ymin, //output y limits
|
||||
float &ymax);
|
||||
void find_cblob_vlimits( //get y limits
|
||||
C_BLOB *blob, //blob to search
|
||||
float leftx, //x limits
|
||||
float rightx,
|
||||
float &ymin, //output y limits
|
||||
float &ymax);
|
||||
void find_cblob_hlimits( //get x limits
|
||||
C_BLOB *blob, //blob to search
|
||||
float bottomy, //y limits
|
||||
float topy,
|
||||
float &xmin, //output x limits
|
||||
float &xymax);
|
||||
C_BLOB *crotate_cblob( //rotate it
|
||||
C_BLOB *blob, //blob to search
|
||||
FCOORD rotation //for landscape
|
||||
);
|
||||
TBOX box_next( //get bounding box
|
||||
BLOBNBOX_IT *it //iterator to blobds
|
||||
);
|
||||
TBOX box_next_pre_chopped( //get bounding box
|
||||
BLOBNBOX_IT *it //iterator to blobds
|
||||
);
|
||||
void vertical_cblob_projection( //project outlines
|
||||
C_BLOB *blob, //blob to project
|
||||
STATS *stats //output
|
||||
);
|
||||
void vertical_coutline_projection( //project outlines
|
||||
C_OUTLINE *outline, //outline to project
|
||||
STATS *stats //output
|
||||
);
|
||||
#ifndef GRAPHICS_DISABLED
|
||||
void plot_blob_list(ScrollView* win, // window to draw in
|
||||
BLOBNBOX_LIST *list, // blob list
|
||||
ScrollView::Color body_colour, // colour to draw
|
||||
ScrollView::Color child_colour); // colour of child
|
||||
#endif // GRAPHICS_DISABLED
|
||||
#endif
|
File diff suppressed because it is too large
Load Diff
|
@ -0,0 +1,449 @@
|
|||
/* -*-C-*-
|
||||
********************************************************************************
|
||||
*
|
||||
* File: blobs.h (Formerly blobs.h)
|
||||
* Description: Blob definition
|
||||
* Author: Mark Seaman, OCR Technology
|
||||
* Created: Fri Oct 27 15:39:52 1989
|
||||
* Modified: Thu Mar 28 15:33:38 1991 (Mark Seaman) marks@hpgrlt
|
||||
* Language: C
|
||||
* Package: N/A
|
||||
* Status: Experimental (Do Not Distribute)
|
||||
*
|
||||
* (c) Copyright 1989, Hewlett-Packard Company.
|
||||
** Licensed under the Apache License, Version 2.0 (the "License");
|
||||
** you may not use this file except in compliance with the License.
|
||||
** You may obtain a copy of the License at
|
||||
** http://www.apache.org/licenses/LICENSE-2.0
|
||||
** Unless required by applicable law or agreed to in writing, software
|
||||
** distributed under the License is distributed on an "AS IS" BASIS,
|
||||
** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
** See the License for the specific language governing permissions and
|
||||
** limitations under the License.
|
||||
*
|
||||
*********************************************************************************/
|
||||
|
||||
#ifndef BLOBS_H
|
||||
#define BLOBS_H
|
||||
|
||||
/*----------------------------------------------------------------------
|
||||
I n c l u d e s
|
||||
----------------------------------------------------------------------*/
|
||||
#include "clst.h"
|
||||
#include "normalis.h"
|
||||
#include "publictypes.h"
|
||||
#include "rect.h"
|
||||
#include "vecfuncs.h"
|
||||
|
||||
class BLOCK;
|
||||
class C_BLOB;
|
||||
class C_OUTLINE;
|
||||
class LLSQ;
|
||||
class ROW;
|
||||
class WERD;
|
||||
|
||||
/*----------------------------------------------------------------------
|
||||
T y p e s
|
||||
----------------------------------------------------------------------*/
|
||||
#define EDGEPTFLAGS 4 /*concavity,length etc. */
|
||||
|
||||
struct TPOINT {
|
||||
TPOINT(): x(0), y(0) {}
|
||||
TPOINT(inT16 vx, inT16 vy) : x(vx), y(vy) {}
|
||||
TPOINT(const ICOORD &ic) : x(ic.x()), y(ic.y()) {}
|
||||
|
||||
void operator+=(const TPOINT& other) {
|
||||
x += other.x;
|
||||
y += other.y;
|
||||
}
|
||||
void operator/=(int divisor) {
|
||||
x /= divisor;
|
||||
y /= divisor;
|
||||
}
|
||||
bool operator==(const TPOINT& other) const {
|
||||
return x == other.x && y == other.y;
|
||||
}
|
||||
// Returns true when the two line segments cross each other.
|
||||
// (Moved from outlines.cpp).
|
||||
static bool IsCrossed(const TPOINT& a0, const TPOINT& a1, const TPOINT& b0,
|
||||
const TPOINT& b1);
|
||||
|
||||
inT16 x; // absolute x coord.
|
||||
inT16 y; // absolute y coord.
|
||||
};
|
||||
typedef TPOINT VECTOR; // structure for coordinates.
|
||||
|
||||
struct EDGEPT {
|
||||
EDGEPT()
|
||||
: next(NULL), prev(NULL), src_outline(NULL), start_step(0), step_count(0) {
|
||||
memset(flags, 0, EDGEPTFLAGS * sizeof(flags[0]));
|
||||
}
|
||||
EDGEPT(const EDGEPT& src) : next(NULL), prev(NULL) {
|
||||
CopyFrom(src);
|
||||
}
|
||||
EDGEPT& operator=(const EDGEPT& src) {
|
||||
CopyFrom(src);
|
||||
return *this;
|
||||
}
|
||||
// Copies the data elements, but leaves the pointers untouched.
|
||||
void CopyFrom(const EDGEPT& src) {
|
||||
pos = src.pos;
|
||||
vec = src.vec;
|
||||
memcpy(flags, src.flags, EDGEPTFLAGS * sizeof(flags[0]));
|
||||
src_outline = src.src_outline;
|
||||
start_step = src.start_step;
|
||||
step_count = src.step_count;
|
||||
}
|
||||
// Returns the squared distance between the points, with the x-component
|
||||
// weighted by x_factor.
|
||||
int WeightedDistance(const EDGEPT& other, int x_factor) const {
|
||||
int x_dist = pos.x - other.pos.x;
|
||||
int y_dist = pos.y - other.pos.y;
|
||||
return x_dist * x_dist * x_factor + y_dist * y_dist;
|
||||
}
|
||||
// Returns true if the positions are equal.
|
||||
bool EqualPos(const EDGEPT& other) const { return pos == other.pos; }
|
||||
// Returns the bounding box of the outline segment from *this to *end.
|
||||
// Ignores hidden edge flags.
|
||||
TBOX SegmentBox(const EDGEPT* end) const {
|
||||
TBOX box(pos.x, pos.y, pos.x, pos.y);
|
||||
const EDGEPT* pt = this;
|
||||
do {
|
||||
pt = pt->next;
|
||||
if (pt->pos.x < box.left()) box.set_left(pt->pos.x);
|
||||
if (pt->pos.x > box.right()) box.set_right(pt->pos.x);
|
||||
if (pt->pos.y < box.bottom()) box.set_bottom(pt->pos.y);
|
||||
if (pt->pos.y > box.top()) box.set_top(pt->pos.y);
|
||||
} while (pt != end && pt != this);
|
||||
return box;
|
||||
}
|
||||
// Returns the area of the outline segment from *this to *end.
|
||||
// Ignores hidden edge flags.
|
||||
int SegmentArea(const EDGEPT* end) const {
|
||||
int area = 0;
|
||||
const EDGEPT* pt = this->next;
|
||||
do {
|
||||
TPOINT origin_vec(pt->pos.x - pos.x, pt->pos.y - pos.y);
|
||||
area += CROSS(origin_vec, pt->vec);
|
||||
pt = pt->next;
|
||||
} while (pt != end && pt != this);
|
||||
return area;
|
||||
}
|
||||
// Returns true if the number of points in the outline segment from *this to
|
||||
// *end is less that min_points and false if we get back to *this first.
|
||||
// Ignores hidden edge flags.
|
||||
bool ShortNonCircularSegment(int min_points, const EDGEPT* end) const {
|
||||
int count = 0;
|
||||
const EDGEPT* pt = this;
|
||||
do {
|
||||
if (pt == end) return true;
|
||||
pt = pt->next;
|
||||
++count;
|
||||
} while (pt != this && count <= min_points);
|
||||
return false;
|
||||
}
|
||||
|
||||
// Accessors to hide or reveal a cut edge from feature extractors.
|
||||
void Hide() {
|
||||
flags[0] = true;
|
||||
}
|
||||
void Reveal() {
|
||||
flags[0] = false;
|
||||
}
|
||||
bool IsHidden() const {
|
||||
return flags[0] != 0;
|
||||
}
|
||||
void MarkChop() {
|
||||
flags[2] = true;
|
||||
}
|
||||
bool IsChopPt() const {
|
||||
return flags[2] != 0;
|
||||
}
|
||||
|
||||
TPOINT pos; // position
|
||||
VECTOR vec; // vector to next point
|
||||
// TODO(rays) Remove flags and replace with
|
||||
// is_hidden, runlength, dir, and fixed. The only use
|
||||
// of the flags other than is_hidden is in polyaprx.cpp.
|
||||
char flags[EDGEPTFLAGS]; // concavity, length etc
|
||||
EDGEPT* next; // anticlockwise element
|
||||
EDGEPT* prev; // clockwise element
|
||||
C_OUTLINE* src_outline; // Outline it came from.
|
||||
// The following fields are not used if src_outline is NULL.
|
||||
int start_step; // Location of pos in src_outline.
|
||||
int step_count; // Number of steps used (may wrap around).
|
||||
};
|
||||
|
||||
// For use in chop and findseam to keep a list of which EDGEPTs were inserted.
|
||||
CLISTIZEH(EDGEPT);
|
||||
|
||||
struct TESSLINE {
|
||||
TESSLINE() : is_hole(false), loop(NULL), next(NULL) {}
|
||||
TESSLINE(const TESSLINE& src) : loop(NULL), next(NULL) {
|
||||
CopyFrom(src);
|
||||
}
|
||||
~TESSLINE() {
|
||||
Clear();
|
||||
}
|
||||
TESSLINE& operator=(const TESSLINE& src) {
|
||||
CopyFrom(src);
|
||||
return *this;
|
||||
}
|
||||
// Consume the circular list of EDGEPTs to make a TESSLINE.
|
||||
static TESSLINE* BuildFromOutlineList(EDGEPT* outline);
|
||||
// Copies the data and the outline, but leaves next untouched.
|
||||
void CopyFrom(const TESSLINE& src);
|
||||
// Deletes owned data.
|
||||
void Clear();
|
||||
// Normalize in-place using the DENORM.
|
||||
void Normalize(const DENORM& denorm);
|
||||
// Rotates by the given rotation in place.
|
||||
void Rotate(const FCOORD rotation);
|
||||
// Moves by the given vec in place.
|
||||
void Move(const ICOORD vec);
|
||||
// Scales by the given factor in place.
|
||||
void Scale(float factor);
|
||||
// Sets up the start and vec members of the loop from the pos members.
|
||||
void SetupFromPos();
|
||||
// Recomputes the bounding box from the points in the loop.
|
||||
void ComputeBoundingBox();
|
||||
// Computes the min and max cross product of the outline points with the
|
||||
// given vec and returns the results in min_xp and max_xp. Geometrically
|
||||
// this is the left and right edge of the outline perpendicular to the
|
||||
// given direction, but to get the distance units correct, you would
|
||||
// have to divide by the modulus of vec.
|
||||
void MinMaxCrossProduct(const TPOINT vec, int* min_xp, int* max_xp) const;
|
||||
|
||||
TBOX bounding_box() const;
|
||||
// Returns true if *this and other have equal bounding boxes.
|
||||
bool SameBox(const TESSLINE& other) const {
|
||||
return topleft == other.topleft && botright == other.botright;
|
||||
}
|
||||
// Returns true if the given line segment crosses any outline of this blob.
|
||||
bool SegmentCrosses(const TPOINT& pt1, const TPOINT& pt2) const {
|
||||
if (Contains(pt1) && Contains(pt2)) {
|
||||
EDGEPT* pt = loop;
|
||||
do {
|
||||
if (TPOINT::IsCrossed(pt1, pt2, pt->pos, pt->next->pos)) return true;
|
||||
pt = pt->next;
|
||||
} while (pt != loop);
|
||||
}
|
||||
return false;
|
||||
}
|
||||
// Returns true if the point is contained within the outline box.
|
||||
bool Contains(const TPOINT& pt) const {
|
||||
return topleft.x <= pt.x && pt.x <= botright.x &&
|
||||
botright.y <= pt.y && pt.y <= topleft.y;
|
||||
}
|
||||
|
||||
#ifndef GRAPHICS_DISABLED
|
||||
void plot(ScrollView* window, ScrollView::Color color,
|
||||
ScrollView::Color child_color);
|
||||
#endif // GRAPHICS_DISABLED
|
||||
|
||||
// Returns the first outline point that has a different src_outline to its
|
||||
// predecessor, or, if all the same, the lowest indexed point.
|
||||
EDGEPT* FindBestStartPt() const;
|
||||
|
||||
|
||||
int BBArea() const {
|
||||
return (botright.x - topleft.x) * (topleft.y - botright.y);
|
||||
}
|
||||
|
||||
TPOINT topleft; // Top left of loop.
|
||||
TPOINT botright; // Bottom right of loop.
|
||||
TPOINT start; // Start of loop.
|
||||
bool is_hole; // True if this is a hole/child outline.
|
||||
EDGEPT *loop; // Edgeloop.
|
||||
TESSLINE *next; // Next outline in blob.
|
||||
}; // Outline structure.
|
||||
|
||||
struct TBLOB {
|
||||
TBLOB() : outlines(NULL) {}
|
||||
TBLOB(const TBLOB& src) : outlines(NULL) {
|
||||
CopyFrom(src);
|
||||
}
|
||||
~TBLOB() {
|
||||
Clear();
|
||||
}
|
||||
TBLOB& operator=(const TBLOB& src) {
|
||||
CopyFrom(src);
|
||||
return *this;
|
||||
}
|
||||
// Factory to build a TBLOB from a C_BLOB with polygonal approximation along
|
||||
// the way. If allow_detailed_fx is true, the EDGEPTs in the returned TBLOB
|
||||
// contain pointers to the input C_OUTLINEs that enable higher-resolution
|
||||
// feature extraction that does not use the polygonal approximation.
|
||||
static TBLOB* PolygonalCopy(bool allow_detailed_fx, C_BLOB* src);
|
||||
// Factory builds a blob with no outlines, but copies the other member data.
|
||||
static TBLOB* ShallowCopy(const TBLOB& src);
|
||||
// Normalizes the blob for classification only if needed.
|
||||
// (Normally this means a non-zero classify rotation.)
|
||||
// If no Normalization is needed, then NULL is returned, and the input blob
|
||||
// can be used directly. Otherwise a new TBLOB is returned which must be
|
||||
// deleted after use.
|
||||
TBLOB* ClassifyNormalizeIfNeeded() const;
|
||||
|
||||
// Copies the data and the outlines, but leaves next untouched.
|
||||
void CopyFrom(const TBLOB& src);
|
||||
// Deletes owned data.
|
||||
void Clear();
|
||||
// Sets up the built-in DENORM and normalizes the blob in-place.
|
||||
// For parameters see DENORM::SetupNormalization, plus the inverse flag for
|
||||
// this blob and the Pix for the full image.
|
||||
void Normalize(const BLOCK* block,
|
||||
const FCOORD* rotation,
|
||||
const DENORM* predecessor,
|
||||
float x_origin, float y_origin,
|
||||
float x_scale, float y_scale,
|
||||
float final_xshift, float final_yshift,
|
||||
bool inverse, Pix* pix);
|
||||
// Rotates by the given rotation in place.
|
||||
void Rotate(const FCOORD rotation);
|
||||
// Moves by the given vec in place.
|
||||
void Move(const ICOORD vec);
|
||||
// Scales by the given factor in place.
|
||||
void Scale(float factor);
|
||||
// Recomputes the bounding boxes of the outlines.
|
||||
void ComputeBoundingBoxes();
|
||||
|
||||
// Returns the number of outlines.
|
||||
int NumOutlines() const;
|
||||
|
||||
TBOX bounding_box() const;
|
||||
|
||||
// Returns true if the given line segment crosses any outline of this blob.
|
||||
bool SegmentCrossesOutline(const TPOINT& pt1, const TPOINT& pt2) const {
|
||||
for (const TESSLINE* outline = outlines; outline != NULL;
|
||||
outline = outline->next) {
|
||||
if (outline->SegmentCrosses(pt1, pt2)) return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
// Returns true if the point is contained within any of the outline boxes.
|
||||
bool Contains(const TPOINT& pt) const {
|
||||
for (const TESSLINE* outline = outlines; outline != NULL;
|
||||
outline = outline->next) {
|
||||
if (outline->Contains(pt)) return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
// Finds and deletes any duplicate outlines in this blob, without deleting
|
||||
// their EDGEPTs.
|
||||
void EliminateDuplicateOutlines();
|
||||
|
||||
// Swaps the outlines of *this and next if needed to keep the centers in
|
||||
// increasing x.
|
||||
void CorrectBlobOrder(TBLOB* next);
|
||||
|
||||
const DENORM& denorm() const {
|
||||
return denorm_;
|
||||
}
|
||||
|
||||
#ifndef GRAPHICS_DISABLED
|
||||
void plot(ScrollView* window, ScrollView::Color color,
|
||||
ScrollView::Color child_color);
|
||||
#endif // GRAPHICS_DISABLED
|
||||
|
||||
int BBArea() const {
|
||||
int total_area = 0;
|
||||
for (TESSLINE* outline = outlines; outline != NULL; outline = outline->next)
|
||||
total_area += outline->BBArea();
|
||||
return total_area;
|
||||
}
|
||||
|
||||
// Computes the center of mass and second moments for the old baseline and
|
||||
// 2nd moment normalizations. Returns the outline length.
|
||||
// The input denorm should be the normalizations that have been applied from
|
||||
// the image to the current state of this TBLOB.
|
||||
int ComputeMoments(FCOORD* center, FCOORD* second_moments) const;
|
||||
// Computes the precise bounding box of the coords that are generated by
|
||||
// GetEdgeCoords. This may be different from the bounding box of the polygon.
|
||||
void GetPreciseBoundingBox(TBOX* precise_box) const;
|
||||
// Adds edges to the given vectors.
|
||||
// For all the edge steps in all the outlines, or polygonal approximation
|
||||
// where there are no edge steps, collects the steps into x_coords/y_coords.
|
||||
// x_coords is a collection of the x-coords of vertical edges for each
|
||||
// y-coord starting at box.bottom().
|
||||
// y_coords is a collection of the y-coords of horizontal edges for each
|
||||
// x-coord starting at box.left().
|
||||
// Eg x_coords[0] is a collection of the x-coords of edges at y=bottom.
|
||||
// Eg x_coords[1] is a collection of the x-coords of edges at y=bottom + 1.
|
||||
void GetEdgeCoords(const TBOX& box,
|
||||
GenericVector<GenericVector<int> >* x_coords,
|
||||
GenericVector<GenericVector<int> >* y_coords) const;
|
||||
|
||||
TESSLINE *outlines; // List of outlines in blob.
|
||||
|
||||
private: // TODO(rays) Someday the data members will be private too.
|
||||
// For all the edge steps in all the outlines, or polygonal approximation
|
||||
// where there are no edge steps, collects the steps into the bounding_box,
|
||||
// llsq and/or the x_coords/y_coords. Both are used in different kinds of
|
||||
// normalization.
|
||||
// For a description of x_coords, y_coords, see GetEdgeCoords above.
|
||||
void CollectEdges(const TBOX& box,
|
||||
TBOX* bounding_box, LLSQ* llsq,
|
||||
GenericVector<GenericVector<int> >* x_coords,
|
||||
GenericVector<GenericVector<int> >* y_coords) const;
|
||||
|
||||
private:
|
||||
// DENORM indicating the transformations that this blob has undergone so far.
|
||||
DENORM denorm_;
|
||||
}; // Blob structure.
|
||||
|
||||
struct TWERD {
|
||||
TWERD() : latin_script(false) {}
|
||||
TWERD(const TWERD& src) {
|
||||
CopyFrom(src);
|
||||
}
|
||||
~TWERD() {
|
||||
Clear();
|
||||
}
|
||||
TWERD& operator=(const TWERD& src) {
|
||||
CopyFrom(src);
|
||||
return *this;
|
||||
}
|
||||
// Factory to build a TWERD from a (C_BLOB) WERD, with polygonal
|
||||
// approximation along the way.
|
||||
static TWERD* PolygonalCopy(bool allow_detailed_fx, WERD* src);
|
||||
// Baseline normalizes the blobs in-place, recording the normalization in the
|
||||
// DENORMs in the blobs.
|
||||
void BLNormalize(const BLOCK* block, const ROW* row, Pix* pix, bool inverse,
|
||||
float x_height, float baseline_shift, bool numeric_mode,
|
||||
tesseract::OcrEngineMode hint,
|
||||
const TBOX* norm_box,
|
||||
DENORM* word_denorm);
|
||||
// Copies the data and the blobs, but leaves next untouched.
|
||||
void CopyFrom(const TWERD& src);
|
||||
// Deletes owned data.
|
||||
void Clear();
|
||||
// Recomputes the bounding boxes of the blobs.
|
||||
void ComputeBoundingBoxes();
|
||||
|
||||
// Returns the number of blobs in the word.
|
||||
int NumBlobs() const {
|
||||
return blobs.size();
|
||||
}
|
||||
TBOX bounding_box() const;
|
||||
|
||||
// Merges the blobs from start to end, not including end, and deletes
|
||||
// the blobs between start and end.
|
||||
void MergeBlobs(int start, int end);
|
||||
|
||||
void plot(ScrollView* window);
|
||||
|
||||
GenericVector<TBLOB*> blobs; // Blobs in word.
|
||||
bool latin_script; // This word is in a latin-based script.
|
||||
};
|
||||
|
||||
/*----------------------------------------------------------------------
|
||||
F u n c t i o n s
|
||||
----------------------------------------------------------------------*/
|
||||
// TODO(rays) Make divisible_blob and divide_blobs members of TBLOB.
|
||||
bool divisible_blob(TBLOB *blob, bool italic_blob, TPOINT* location);
|
||||
|
||||
void divide_blobs(TBLOB *blob, TBLOB *other_blob, bool italic_blob,
|
||||
const TPOINT& location);
|
||||
|
||||
#endif
|
|
@ -0,0 +1,71 @@
|
|||
/**********************************************************************
|
||||
* File: blread.cpp (Formerly pdread.c)
|
||||
* Description: Friend function of BLOCK to read the uscan pd file.
|
||||
* Author: Ray Smith
|
||||
* Created: Mon Mar 18 14:39:00 GMT 1991
|
||||
*
|
||||
* (C) Copyright 1991, Hewlett-Packard Ltd.
|
||||
** Licensed under the Apache License, Version 2.0 (the "License");
|
||||
** you may not use this file except in compliance with the License.
|
||||
** You may obtain a copy of the License at
|
||||
** http://www.apache.org/licenses/LICENSE-2.0
|
||||
** Unless required by applicable law or agreed to in writing, software
|
||||
** distributed under the License is distributed on an "AS IS" BASIS,
|
||||
** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
** See the License for the specific language governing permissions and
|
||||
** limitations under the License.
|
||||
*
|
||||
**********************************************************************/
|
||||
|
||||
#include <stdlib.h>
|
||||
#ifdef __UNIX__
|
||||
#include <assert.h>
|
||||
#endif
|
||||
#include "scanutils.h"
|
||||
#include "fileerr.h"
|
||||
#include "blread.h"
|
||||
|
||||
#define UNLV_EXT ".uzn" // unlv zone file
|
||||
|
||||
/**********************************************************************
|
||||
* read_unlv_file
|
||||
*
|
||||
* Read a whole unlv zone file to make a list of blocks.
|
||||
**********************************************************************/
|
||||
|
||||
bool read_unlv_file( //print list of sides
|
||||
STRING name, //basename of file
|
||||
inT32 xsize, //image size
|
||||
inT32 ysize, //image size
|
||||
BLOCK_LIST *blocks //output list
|
||||
) {
|
||||
FILE *pdfp; //file pointer
|
||||
BLOCK *block; //current block
|
||||
int x; //current top-down coords
|
||||
int y;
|
||||
int width; //of current block
|
||||
int height;
|
||||
BLOCK_IT block_it = blocks; //block iterator
|
||||
|
||||
name += UNLV_EXT; //add extension
|
||||
if ((pdfp = fopen (name.string (), "rb")) == NULL) {
|
||||
return false; //didn't read one
|
||||
} else {
|
||||
while (tfscanf(pdfp, "%d %d %d %d %*s", &x, &y, &width, &height) >= 4) {
|
||||
//make rect block
|
||||
block = new BLOCK (name.string (), TRUE, 0, 0,
|
||||
(inT16) x, (inT16) (ysize - y - height),
|
||||
(inT16) (x + width), (inT16) (ysize - y));
|
||||
//on end of list
|
||||
block_it.add_to_end (block);
|
||||
}
|
||||
fclose(pdfp);
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
void FullPageBlock(int width, int height, BLOCK_LIST *blocks) {
|
||||
BLOCK_IT block_it(blocks);
|
||||
BLOCK* block = new BLOCK("", TRUE, 0, 0, 0, 0, width, height);
|
||||
block_it.add_to_end(block);
|
||||
}
|
|
@ -0,0 +1,33 @@
|
|||
/**********************************************************************
|
||||
* File: blread.h (Formerly pdread.h)
|
||||
* Description: Friend function of BLOCK to read the uscan pd file.
|
||||
* Author: Ray Smith
|
||||
* Created: Mon Mar 18 14:39:00 GMT 1991
|
||||
*
|
||||
* (C) Copyright 1991, Hewlett-Packard Ltd.
|
||||
** Licensed under the Apache License, Version 2.0 (the "License");
|
||||
** you may not use this file except in compliance with the License.
|
||||
** You may obtain a copy of the License at
|
||||
** http://www.apache.org/licenses/LICENSE-2.0
|
||||
** Unless required by applicable law or agreed to in writing, software
|
||||
** distributed under the License is distributed on an "AS IS" BASIS,
|
||||
** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
** See the License for the specific language governing permissions and
|
||||
** limitations under the License.
|
||||
*
|
||||
**********************************************************************/
|
||||
|
||||
#ifndef BLREAD_H
|
||||
#define BLREAD_H
|
||||
|
||||
#include "params.h"
|
||||
#include "ocrblock.h"
|
||||
|
||||
bool read_unlv_file( //print list of sides
|
||||
STRING name, //basename of file
|
||||
inT32 xsize, //image size
|
||||
inT32 ysize, //image size
|
||||
BLOCK_LIST *blocks //output list
|
||||
);
|
||||
void FullPageBlock(int width, int height, BLOCK_LIST *blocks);
|
||||
#endif
|
|
@ -0,0 +1,235 @@
|
|||
/**********************************************************************
|
||||
* File: boxread.cpp
|
||||
* Description: Read data from a box file.
|
||||
* Author: Ray Smith
|
||||
* Created: Fri Aug 24 17:47:23 PDT 2007
|
||||
*
|
||||
* (C) Copyright 2007, Google Inc.
|
||||
** Licensed under the Apache License, Version 2.0 (the "License");
|
||||
** you may not use this file except in compliance with the License.
|
||||
** You may obtain a copy of the License at
|
||||
** http://www.apache.org/licenses/LICENSE-2.0
|
||||
** Unless required by applicable law or agreed to in writing, software
|
||||
** distributed under the License is distributed on an "AS IS" BASIS,
|
||||
** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
** See the License for the specific language governing permissions and
|
||||
** limitations under the License.
|
||||
*
|
||||
**********************************************************************/
|
||||
|
||||
#include "boxread.h"
|
||||
#include <string.h>
|
||||
|
||||
#include "fileerr.h"
|
||||
#include "rect.h"
|
||||
#include "strngs.h"
|
||||
#include "tprintf.h"
|
||||
#include "unichar.h"
|
||||
|
||||
// Special char code used to identify multi-blob labels.
|
||||
static const char* kMultiBlobLabelCode = "WordStr";
|
||||
|
||||
// Open the boxfile based on the given image filename.
|
||||
FILE* OpenBoxFile(const STRING& fname) {
|
||||
STRING filename = BoxFileName(fname);
|
||||
FILE* box_file = NULL;
|
||||
if (!(box_file = fopen(filename.string(), "rb"))) {
|
||||
CANTOPENFILE.error("read_next_box", TESSEXIT, "Can't open box file %s",
|
||||
filename.string());
|
||||
}
|
||||
return box_file;
|
||||
}
|
||||
|
||||
// Reads all boxes from the given filename.
|
||||
// Reads a specific target_page number if >= 0, or all pages otherwise.
|
||||
// Skips blanks if skip_blanks is true.
|
||||
// The UTF-8 label of the box is put in texts, and the full box definition as
|
||||
// a string is put in box_texts, with the corresponding page number in pages.
|
||||
// Each of the output vectors is optional (may be NULL).
|
||||
// Returns false if no boxes are found.
|
||||
bool ReadAllBoxes(int target_page, bool skip_blanks, const STRING& filename,
|
||||
GenericVector<TBOX>* boxes,
|
||||
GenericVector<STRING>* texts,
|
||||
GenericVector<STRING>* box_texts,
|
||||
GenericVector<int>* pages) {
|
||||
GenericVector<char> box_data;
|
||||
if (!tesseract::LoadDataFromFile(BoxFileName(filename), &box_data))
|
||||
return false;
|
||||
// Convert the array of bytes to a string, so it can be used by the parser.
|
||||
box_data.push_back('\0');
|
||||
return ReadMemBoxes(target_page, skip_blanks, &box_data[0], boxes, texts,
|
||||
box_texts, pages);
|
||||
}
|
||||
|
||||
// Reads all boxes from the string. Otherwise, as ReadAllBoxes.
|
||||
bool ReadMemBoxes(int target_page, bool skip_blanks, const char* box_data,
|
||||
GenericVector<TBOX>* boxes,
|
||||
GenericVector<STRING>* texts,
|
||||
GenericVector<STRING>* box_texts,
|
||||
GenericVector<int>* pages) {
|
||||
STRING box_str(box_data);
|
||||
GenericVector<STRING> lines;
|
||||
box_str.split('\n', &lines);
|
||||
if (lines.empty()) return false;
|
||||
int num_boxes = 0;
|
||||
for (int i = 0; i < lines.size(); ++i) {
|
||||
int page = 0;
|
||||
STRING utf8_str;
|
||||
TBOX box;
|
||||
if (!ParseBoxFileStr(lines[i].string(), &page, &utf8_str, &box)) {
|
||||
continue;
|
||||
}
|
||||
if (skip_blanks && (utf8_str == " " || utf8_str == "\t")) continue;
|
||||
if (target_page >= 0 && page != target_page) continue;
|
||||
if (boxes != NULL) boxes->push_back(box);
|
||||
if (texts != NULL) texts->push_back(utf8_str);
|
||||
if (box_texts != NULL) {
|
||||
STRING full_text;
|
||||
MakeBoxFileStr(utf8_str.string(), box, target_page, &full_text);
|
||||
box_texts->push_back(full_text);
|
||||
}
|
||||
if (pages != NULL) pages->push_back(page);
|
||||
++num_boxes;
|
||||
}
|
||||
return num_boxes > 0;
|
||||
}
|
||||
|
||||
// Returns the box file name corresponding to the given image_filename.
|
||||
STRING BoxFileName(const STRING& image_filename) {
|
||||
STRING box_filename = image_filename;
|
||||
const char *lastdot = strrchr(box_filename.string(), '.');
|
||||
if (lastdot != NULL)
|
||||
box_filename.truncate_at(lastdot - box_filename.string());
|
||||
|
||||
box_filename += ".box";
|
||||
return box_filename;
|
||||
}
|
||||
|
||||
// TODO(rays) convert all uses of ReadNextBox to use the new ReadAllBoxes.
|
||||
// Box files are used ONLY DURING TRAINING, but by both processes of
|
||||
// creating tr files with tesseract, and unicharset_extractor.
|
||||
// ReadNextBox factors out the code to interpret a line of a box
|
||||
// file so that applybox and unicharset_extractor interpret the same way.
|
||||
// This function returns the next valid box file utf8 string and coords
|
||||
// and returns true, or false on eof (and closes the file).
|
||||
// It ignores the utf8 file signature ByteOrderMark (U+FEFF=EF BB BF), checks
|
||||
// for valid utf-8 and allows space or tab between fields.
|
||||
// utf8_str is set with the unichar string, and bounding box with the box.
|
||||
// If there are page numbers in the file, it reads them all.
|
||||
bool ReadNextBox(int *line_number, FILE* box_file,
|
||||
STRING* utf8_str, TBOX* bounding_box) {
|
||||
return ReadNextBox(-1, line_number, box_file, utf8_str, bounding_box);
|
||||
}
|
||||
|
||||
// As ReadNextBox above, but get a specific page number. (0-based)
|
||||
// Use -1 to read any page number. Files without page number all
|
||||
// read as if they are page 0.
|
||||
bool ReadNextBox(int target_page, int *line_number, FILE* box_file,
|
||||
STRING* utf8_str, TBOX* bounding_box) {
|
||||
int page = 0;
|
||||
char buff[kBoxReadBufSize]; // boxfile read buffer
|
||||
char *buffptr = buff;
|
||||
|
||||
while (fgets(buff, sizeof(buff) - 1, box_file)) {
|
||||
(*line_number)++;
|
||||
|
||||
buffptr = buff;
|
||||
const unsigned char *ubuf = reinterpret_cast<const unsigned char*>(buffptr);
|
||||
if (ubuf[0] == 0xef && ubuf[1] == 0xbb && ubuf[2] == 0xbf)
|
||||
buffptr += 3; // Skip unicode file designation.
|
||||
// Check for blank lines in box file
|
||||
if (*buffptr == '\n' || *buffptr == '\0') continue;
|
||||
// Skip blank boxes.
|
||||
if (*buffptr == ' ' || *buffptr == '\t') continue;
|
||||
if (*buffptr != '\0') {
|
||||
if (!ParseBoxFileStr(buffptr, &page, utf8_str, bounding_box)) {
|
||||
tprintf("Box file format error on line %i; ignored\n", *line_number);
|
||||
continue;
|
||||
}
|
||||
if (target_page >= 0 && target_page != page)
|
||||
continue; // Not on the appropriate page.
|
||||
return true; // Successfully read a box.
|
||||
}
|
||||
}
|
||||
fclose(box_file);
|
||||
return false; // EOF
|
||||
}
|
||||
|
||||
// Parses the given box file string into a page_number, utf8_str, and
|
||||
// bounding_box. Returns true on a successful parse.
|
||||
// The box file is assumed to contain box definitions, one per line, of the
|
||||
// following format for blob-level boxes:
|
||||
// <UTF8 str> <left> <bottom> <right> <top> <page id>
|
||||
// and for word/line-level boxes:
|
||||
// WordStr <left> <bottom> <right> <top> <page id> #<space-delimited word str>
|
||||
// See applyybox.cpp for more information.
|
||||
bool ParseBoxFileStr(const char* boxfile_str, int* page_number,
|
||||
STRING* utf8_str, TBOX* bounding_box) {
|
||||
*bounding_box = TBOX(); // Initialize it to empty.
|
||||
*utf8_str = "";
|
||||
char uch[kBoxReadBufSize];
|
||||
const char *buffptr = boxfile_str;
|
||||
// Read the unichar without messing up on Tibetan.
|
||||
// According to issue 253 the utf-8 surrogates 85 and A0 are treated
|
||||
// as whitespace by sscanf, so it is more reliable to just find
|
||||
// ascii space and tab.
|
||||
int uch_len = 0;
|
||||
// Skip unicode file designation, if present.
|
||||
const unsigned char *ubuf = reinterpret_cast<const unsigned char*>(buffptr);
|
||||
if (ubuf[0] == 0xef && ubuf[1] == 0xbb && ubuf[2] == 0xbf)
|
||||
buffptr += 3;
|
||||
// Allow a single blank as the UTF-8 string. Check for empty string and
|
||||
// then blindly eat the first character.
|
||||
if (*buffptr == '\0') return false;
|
||||
do {
|
||||
uch[uch_len++] = *buffptr++;
|
||||
} while (*buffptr != '\0' && *buffptr != ' ' && *buffptr != '\t' &&
|
||||
uch_len < kBoxReadBufSize - 1);
|
||||
uch[uch_len] = '\0';
|
||||
if (*buffptr != '\0') ++buffptr;
|
||||
int x_min, y_min, x_max, y_max;
|
||||
*page_number = 0;
|
||||
int count = sscanf(buffptr, "%d %d %d %d %d",
|
||||
&x_min, &y_min, &x_max, &y_max, page_number);
|
||||
if (count != 5 && count != 4) {
|
||||
tprintf("Bad box coordinates in boxfile string! %s\n", ubuf);
|
||||
return false;
|
||||
}
|
||||
// Test for long space-delimited string label.
|
||||
if (strcmp(uch, kMultiBlobLabelCode) == 0 &&
|
||||
(buffptr = strchr(buffptr, '#')) != NULL) {
|
||||
strncpy(uch, buffptr + 1, kBoxReadBufSize - 1);
|
||||
uch[kBoxReadBufSize - 1] = '\0'; // Prevent buffer overrun.
|
||||
chomp_string(uch);
|
||||
uch_len = strlen(uch);
|
||||
}
|
||||
// Validate UTF8 by making unichars with it.
|
||||
int used = 0;
|
||||
while (used < uch_len) {
|
||||
UNICHAR ch(uch + used, uch_len - used);
|
||||
int new_used = ch.utf8_len();
|
||||
if (new_used == 0) {
|
||||
tprintf("Bad UTF-8 str %s starts with 0x%02x at col %d\n",
|
||||
uch + used, uch[used], used + 1);
|
||||
return false;
|
||||
}
|
||||
used += new_used;
|
||||
}
|
||||
*utf8_str = uch;
|
||||
if (x_min > x_max) Swap(&x_min, &x_max);
|
||||
if (y_min > y_max) Swap(&y_min, &y_max);
|
||||
bounding_box->set_to_given_coords(x_min, y_min, x_max, y_max);
|
||||
return true; // Successfully read a box.
|
||||
}
|
||||
|
||||
// Creates a box file string from a unichar string, TBOX and page number.
|
||||
void MakeBoxFileStr(const char* unichar_str, const TBOX& box, int page_num,
|
||||
STRING* box_str) {
|
||||
*box_str = unichar_str;
|
||||
box_str->add_str_int(" ", box.left());
|
||||
box_str->add_str_int(" ", box.bottom());
|
||||
box_str->add_str_int(" ", box.right());
|
||||
box_str->add_str_int(" ", box.top());
|
||||
box_str->add_str_int(" ", page_num);
|
||||
}
|
||||
|
|
@ -0,0 +1,85 @@
|
|||
/**********************************************************************
|
||||
* File: boxread.cpp
|
||||
* Description: Read data from a box file.
|
||||
* Author: Ray Smith
|
||||
* Created: Fri Aug 24 17:47:23 PDT 2007
|
||||
*
|
||||
* (C) Copyright 2007, Google Inc.
|
||||
** Licensed under the Apache License, Version 2.0 (the "License");
|
||||
** you may not use this file except in compliance with the License.
|
||||
** You may obtain a copy of the License at
|
||||
** http://www.apache.org/licenses/LICENSE-2.0
|
||||
** Unless required by applicable law or agreed to in writing, software
|
||||
** distributed under the License is distributed on an "AS IS" BASIS,
|
||||
** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
** See the License for the specific language governing permissions and
|
||||
** limitations under the License.
|
||||
*
|
||||
**********************************************************************/
|
||||
|
||||
#ifndef TESSERACT_CCUTIL_BOXREAD_H__
|
||||
#define TESSERACT_CCUTIL_BOXREAD_H__
|
||||
|
||||
#include <stdio.h>
|
||||
#include "genericvector.h"
|
||||
#include "strngs.h"
|
||||
|
||||
class STRING;
|
||||
class TBOX;
|
||||
|
||||
// Size of buffer used to read a line from a box file.
|
||||
const int kBoxReadBufSize = 1024;
|
||||
|
||||
// Open the boxfile based on the given image filename.
|
||||
// Returns NULL if the box file cannot be opened.
|
||||
FILE* OpenBoxFile(const STRING& fname);
|
||||
|
||||
// Reads all boxes from the given filename.
|
||||
// Reads a specific target_page number if >= 0, or all pages otherwise.
|
||||
// Skips blanks if skip_blanks is true.
|
||||
// The UTF-8 label of the box is put in texts, and the full box definition as
|
||||
// a string is put in box_texts, with the corresponding page number in pages.
|
||||
// Each of the output vectors is optional (may be NULL).
|
||||
// Returns false if no boxes are found.
|
||||
bool ReadAllBoxes(int target_page, bool skip_blanks, const STRING& filename,
|
||||
GenericVector<TBOX>* boxes,
|
||||
GenericVector<STRING>* texts,
|
||||
GenericVector<STRING>* box_texts,
|
||||
GenericVector<int>* pages);
|
||||
|
||||
// Reads all boxes from the string. Otherwise, as ReadAllBoxes.
|
||||
bool ReadMemBoxes(int target_page, bool skip_blanks, const char* box_data,
|
||||
GenericVector<TBOX>* boxes,
|
||||
GenericVector<STRING>* texts,
|
||||
GenericVector<STRING>* box_texts,
|
||||
GenericVector<int>* pages);
|
||||
|
||||
// Returns the box file name corresponding to the given image_filename.
|
||||
STRING BoxFileName(const STRING& image_filename);
|
||||
|
||||
// ReadNextBox factors out the code to interpret a line of a box
|
||||
// file so that applybox and unicharset_extractor interpret the same way.
|
||||
// This function returns the next valid box file utf8 string and coords
|
||||
// and returns true, or false on eof (and closes the file).
|
||||
// It ignores the utf8 file signature ByteOrderMark (U+FEFF=EF BB BF), checks
|
||||
// for valid utf-8 and allows space or tab between fields.
|
||||
// utf8_str is set with the unichar string, and bounding box with the box.
|
||||
// If there are page numbers in the file, it reads them all.
|
||||
bool ReadNextBox(int *line_number, FILE* box_file,
|
||||
STRING* utf8_str, TBOX* bounding_box);
|
||||
// As ReadNextBox above, but get a specific page number. (0-based)
|
||||
// Use -1 to read any page number. Files without page number all
|
||||
// read as if they are page 0.
|
||||
bool ReadNextBox(int target_page, int *line_number, FILE* box_file,
|
||||
STRING* utf8_str, TBOX* bounding_box);
|
||||
|
||||
// Parses the given box file string into a page_number, utf8_str, and
|
||||
// bounding_box. Returns true on a successful parse.
|
||||
bool ParseBoxFileStr(const char* boxfile_str, int* page_number,
|
||||
STRING* utf8_str, TBOX* bounding_box);
|
||||
|
||||
// Creates a box file string from a unichar string, TBOX and page number.
|
||||
void MakeBoxFileStr(const char* unichar_str, const TBOX& box, int page_num,
|
||||
STRING* box_str);
|
||||
|
||||
#endif // TESSERACT_CCUTIL_BOXREAD_H__
|
|
@ -0,0 +1,203 @@
|
|||
///////////////////////////////////////////////////////////////////////
|
||||
// File: boxword.h
|
||||
// Description: Class to represent the bounding boxes of the output.
|
||||
// Author: Ray Smith
|
||||
// Created: Tue May 25 14:18:14 PDT 2010
|
||||
//
|
||||
// (C) Copyright 2010, Google Inc.
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
///////////////////////////////////////////////////////////////////////
|
||||
|
||||
#include "blobs.h"
|
||||
#include "boxword.h"
|
||||
#include "normalis.h"
|
||||
#include "ocrblock.h"
|
||||
#include "pageres.h"
|
||||
|
||||
namespace tesseract {
|
||||
|
||||
// Clip output boxes to input blob boxes for bounds that are within this
|
||||
// tolerance. Otherwise, the blob may be chopped and we have to just use
|
||||
// the word bounding box.
|
||||
const int kBoxClipTolerance = 2;
|
||||
|
||||
BoxWord::BoxWord() : length_(0) {
|
||||
}
|
||||
|
||||
BoxWord::BoxWord(const BoxWord& src) {
|
||||
CopyFrom(src);
|
||||
}
|
||||
|
||||
BoxWord::~BoxWord() {
|
||||
}
|
||||
|
||||
BoxWord& BoxWord::operator=(const BoxWord& src) {
|
||||
CopyFrom(src);
|
||||
return *this;
|
||||
}
|
||||
|
||||
void BoxWord::CopyFrom(const BoxWord& src) {
|
||||
bbox_ = src.bbox_;
|
||||
length_ = src.length_;
|
||||
boxes_.clear();
|
||||
boxes_.reserve(length_);
|
||||
for (int i = 0; i < length_; ++i)
|
||||
boxes_.push_back(src.boxes_[i]);
|
||||
}
|
||||
|
||||
// Factory to build a BoxWord from a TWERD using the DENORMs on each blob to
|
||||
// switch back to original image coordinates.
|
||||
BoxWord* BoxWord::CopyFromNormalized(TWERD* tessword) {
|
||||
BoxWord* boxword = new BoxWord();
|
||||
// Count the blobs.
|
||||
boxword->length_ = tessword->NumBlobs();
|
||||
// Allocate memory.
|
||||
boxword->boxes_.reserve(boxword->length_);
|
||||
|
||||
for (int b = 0; b < boxword->length_; ++b) {
|
||||
TBLOB* tblob = tessword->blobs[b];
|
||||
TBOX blob_box;
|
||||
for (TESSLINE* outline = tblob->outlines; outline != NULL;
|
||||
outline = outline->next) {
|
||||
EDGEPT* edgept = outline->loop;
|
||||
// Iterate over the edges.
|
||||
do {
|
||||
if (!edgept->IsHidden() || !edgept->prev->IsHidden()) {
|
||||
ICOORD pos(edgept->pos.x, edgept->pos.y);
|
||||
TPOINT denormed;
|
||||
tblob->denorm().DenormTransform(NULL, edgept->pos, &denormed);
|
||||
pos.set_x(denormed.x);
|
||||
pos.set_y(denormed.y);
|
||||
TBOX pt_box(pos, pos);
|
||||
blob_box += pt_box;
|
||||
}
|
||||
edgept = edgept->next;
|
||||
} while (edgept != outline->loop);
|
||||
}
|
||||
boxword->boxes_.push_back(blob_box);
|
||||
}
|
||||
boxword->ComputeBoundingBox();
|
||||
return boxword;
|
||||
}
|
||||
|
||||
// Clean up the bounding boxes from the polygonal approximation by
|
||||
// expanding slightly, then clipping to the blobs from the original_word
|
||||
// that overlap. If not null, the block provides the inverse rotation.
|
||||
void BoxWord::ClipToOriginalWord(const BLOCK* block, WERD* original_word) {
|
||||
for (int i = 0; i < length_; ++i) {
|
||||
TBOX box = boxes_[i];
|
||||
// Expand by a single pixel, as the poly approximation error is 1 pixel.
|
||||
box = TBOX(box.left() - 1, box.bottom() - 1,
|
||||
box.right() + 1, box.top() + 1);
|
||||
// Now find the original box that matches.
|
||||
TBOX original_box;
|
||||
C_BLOB_IT b_it(original_word->cblob_list());
|
||||
for (b_it.mark_cycle_pt(); !b_it.cycled_list(); b_it.forward()) {
|
||||
TBOX blob_box = b_it.data()->bounding_box();
|
||||
if (block != NULL)
|
||||
blob_box.rotate(block->re_rotation());
|
||||
if (blob_box.major_overlap(box)) {
|
||||
original_box += blob_box;
|
||||
}
|
||||
}
|
||||
if (!original_box.null_box()) {
|
||||
if (NearlyEqual<int>(original_box.left(), box.left(), kBoxClipTolerance))
|
||||
box.set_left(original_box.left());
|
||||
if (NearlyEqual<int>(original_box.right(), box.right(),
|
||||
kBoxClipTolerance))
|
||||
box.set_right(original_box.right());
|
||||
if (NearlyEqual<int>(original_box.top(), box.top(), kBoxClipTolerance))
|
||||
box.set_top(original_box.top());
|
||||
if (NearlyEqual<int>(original_box.bottom(), box.bottom(),
|
||||
kBoxClipTolerance))
|
||||
box.set_bottom(original_box.bottom());
|
||||
}
|
||||
original_box = original_word->bounding_box();
|
||||
if (block != NULL)
|
||||
original_box.rotate(block->re_rotation());
|
||||
boxes_[i] = box.intersection(original_box);
|
||||
}
|
||||
ComputeBoundingBox();
|
||||
}
|
||||
|
||||
// Merges the boxes from start to end, not including end, and deletes
|
||||
// the boxes between start and end.
|
||||
void BoxWord::MergeBoxes(int start, int end) {
|
||||
start = ClipToRange(start, 0, length_);
|
||||
end = ClipToRange(end, 0, length_);
|
||||
if (end <= start + 1)
|
||||
return;
|
||||
for (int i = start + 1; i < end; ++i) {
|
||||
boxes_[start] += boxes_[i];
|
||||
}
|
||||
int shrinkage = end - 1 - start;
|
||||
length_ -= shrinkage;
|
||||
for (int i = start + 1; i < length_; ++i)
|
||||
boxes_[i] = boxes_[i + shrinkage];
|
||||
boxes_.truncate(length_);
|
||||
}
|
||||
|
||||
// Inserts a new box before the given index.
|
||||
// Recomputes the bounding box.
|
||||
void BoxWord::InsertBox(int index, const TBOX& box) {
|
||||
if (index < length_)
|
||||
boxes_.insert(box, index);
|
||||
else
|
||||
boxes_.push_back(box);
|
||||
length_ = boxes_.size();
|
||||
ComputeBoundingBox();
|
||||
}
|
||||
|
||||
// Changes the box at the given index to the new box.
|
||||
// Recomputes the bounding box.
|
||||
void BoxWord::ChangeBox(int index, const TBOX& box) {
|
||||
boxes_[index] = box;
|
||||
ComputeBoundingBox();
|
||||
}
|
||||
|
||||
// Deletes the box with the given index, and shuffles up the rest.
|
||||
// Recomputes the bounding box.
|
||||
void BoxWord::DeleteBox(int index) {
|
||||
ASSERT_HOST(0 <= index && index < length_);
|
||||
boxes_.remove(index);
|
||||
--length_;
|
||||
ComputeBoundingBox();
|
||||
}
|
||||
|
||||
// Deletes all the boxes stored in BoxWord.
|
||||
void BoxWord::DeleteAllBoxes() {
|
||||
length_ = 0;
|
||||
boxes_.clear();
|
||||
bbox_ = TBOX();
|
||||
}
|
||||
|
||||
// Computes the bounding box of the word.
|
||||
void BoxWord::ComputeBoundingBox() {
|
||||
bbox_ = TBOX();
|
||||
for (int i = 0; i < length_; ++i)
|
||||
bbox_ += boxes_[i];
|
||||
}
|
||||
|
||||
// This and other putatively are the same, so call the (permanent) callback
|
||||
// for each blob index where the bounding boxes match.
|
||||
// The callback is deleted on completion.
|
||||
void BoxWord::ProcessMatchedBlobs(const TWERD& other,
|
||||
TessCallback1<int>* cb) const {
|
||||
for (int i = 0; i < length_ && i < other.NumBlobs(); ++i) {
|
||||
TBOX blob_box = other.blobs[i]->bounding_box();
|
||||
if (blob_box == boxes_[i])
|
||||
cb->Run(i);
|
||||
}
|
||||
delete cb;
|
||||
}
|
||||
|
||||
} // namespace tesseract.
|
|
@ -0,0 +1,101 @@
|
|||
///////////////////////////////////////////////////////////////////////
|
||||
// File: boxword.h
|
||||
// Description: Class to represent the bounding boxes of the output.
|
||||
// Author: Ray Smith
|
||||
// Created: Tue May 25 14:18:14 PDT 2010
|
||||
//
|
||||
// (C) Copyright 2010, Google Inc.
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
///////////////////////////////////////////////////////////////////////
|
||||
|
||||
#ifndef TESSERACT_CSTRUCT_BOXWORD_H__
|
||||
#define TESSERACT_CSTRUCT_BOXWORD_H__
|
||||
|
||||
#include "genericvector.h"
|
||||
#include "rect.h"
|
||||
#include "unichar.h"
|
||||
|
||||
class BLOCK;
|
||||
class DENORM;
|
||||
struct TWERD;
|
||||
class UNICHARSET;
|
||||
class WERD;
|
||||
class WERD_CHOICE;
|
||||
class WERD_RES;
|
||||
|
||||
namespace tesseract {
|
||||
|
||||
// Class to hold an array of bounding boxes for an output word and
|
||||
// the bounding box of the whole word.
|
||||
class BoxWord {
|
||||
public:
|
||||
BoxWord();
|
||||
explicit BoxWord(const BoxWord& src);
|
||||
~BoxWord();
|
||||
|
||||
BoxWord& operator=(const BoxWord& src);
|
||||
|
||||
void CopyFrom(const BoxWord& src);
|
||||
|
||||
// Factory to build a BoxWord from a TWERD using the DENORMs on each blob to
|
||||
// switch back to original image coordinates.
|
||||
static BoxWord* CopyFromNormalized(TWERD* tessword);
|
||||
|
||||
// Clean up the bounding boxes from the polygonal approximation by
|
||||
// expanding slightly, then clipping to the blobs from the original_word
|
||||
// that overlap. If not null, the block provides the inverse rotation.
|
||||
void ClipToOriginalWord(const BLOCK* block, WERD* original_word);
|
||||
|
||||
// Merges the boxes from start to end, not including end, and deletes
|
||||
// the boxes between start and end.
|
||||
void MergeBoxes(int start, int end);
|
||||
|
||||
// Inserts a new box before the given index.
|
||||
// Recomputes the bounding box.
|
||||
void InsertBox(int index, const TBOX& box);
|
||||
|
||||
// Changes the box at the given index to the new box.
|
||||
// Recomputes the bounding box.
|
||||
void ChangeBox(int index, const TBOX& box);
|
||||
|
||||
// Deletes the box with the given index, and shuffles up the rest.
|
||||
// Recomputes the bounding box.
|
||||
void DeleteBox(int index);
|
||||
|
||||
// Deletes all the boxes stored in BoxWord.
|
||||
void DeleteAllBoxes();
|
||||
|
||||
// This and other putatively are the same, so call the (permanent) callback
|
||||
// for each blob index where the bounding boxes match.
|
||||
// The callback is deleted on completion.
|
||||
void ProcessMatchedBlobs(const TWERD& other, TessCallback1<int>* cb) const;
|
||||
|
||||
const TBOX& bounding_box() const {
|
||||
return bbox_;
|
||||
}
|
||||
int length() const { return length_; }
|
||||
const TBOX& BlobBox(int index) const {
|
||||
return boxes_[index];
|
||||
}
|
||||
|
||||
private:
|
||||
void ComputeBoundingBox();
|
||||
|
||||
TBOX bbox_;
|
||||
int length_;
|
||||
GenericVector<TBOX> boxes_;
|
||||
};
|
||||
|
||||
} // namespace tesseract.
|
||||
|
||||
|
||||
#endif // TESSERACT_CSTRUCT_BOXWORD_H__
|
|
@ -0,0 +1,36 @@
|
|||
///////////////////////////////////////////////////////////////////////
|
||||
// File: ccstruct.cpp
|
||||
// Description: ccstruct class.
|
||||
// Author: Samuel Charron
|
||||
//
|
||||
// (C) Copyright 2006, Google Inc.
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
///////////////////////////////////////////////////////////////////////
|
||||
|
||||
#include "ccstruct.h"
|
||||
|
||||
namespace tesseract {
|
||||
|
||||
// APPROXIMATIONS of the fractions of the character cell taken by
|
||||
// the descenders, ascenders, and x-height.
|
||||
const double CCStruct::kDescenderFraction = 0.25;
|
||||
const double CCStruct::kXHeightFraction = 0.5;
|
||||
const double CCStruct::kAscenderFraction = 0.25;
|
||||
const double CCStruct::kXHeightCapRatio = CCStruct::kXHeightFraction /
|
||||
(CCStruct::kXHeightFraction + CCStruct::kAscenderFraction);
|
||||
|
||||
CCStruct::CCStruct() {}
|
||||
|
||||
CCStruct::~CCStruct() {
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,44 @@
|
|||
///////////////////////////////////////////////////////////////////////
|
||||
// File: ccstruct.h
|
||||
// Description: ccstruct class.
|
||||
// Author: Samuel Charron
|
||||
//
|
||||
// (C) Copyright 2006, Google Inc.
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
///////////////////////////////////////////////////////////////////////
|
||||
|
||||
#ifndef TESSERACT_CCSTRUCT_CCSTRUCT_H__
|
||||
#define TESSERACT_CCSTRUCT_CCSTRUCT_H__
|
||||
|
||||
#include "cutil.h"
|
||||
|
||||
namespace tesseract {
|
||||
class CCStruct : public CUtil {
|
||||
public:
|
||||
CCStruct();
|
||||
~CCStruct();
|
||||
|
||||
// Globally accessible constants.
|
||||
// APPROXIMATIONS of the fractions of the character cell taken by
|
||||
// the descenders, ascenders, and x-height.
|
||||
static const double kDescenderFraction; // = 0.25;
|
||||
static const double kXHeightFraction; // = 0.5;
|
||||
static const double kAscenderFraction; // = 0.25;
|
||||
// Derived value giving the x-height as a fraction of cap-height.
|
||||
static const double kXHeightCapRatio; // = XHeight/(XHeight + Ascender).
|
||||
};
|
||||
|
||||
class Tesseract;
|
||||
} // namespace tesseract
|
||||
|
||||
|
||||
#endif // TESSERACT_CCSTRUCT_CCSTRUCT_H__
|
File diff suppressed because it is too large
Load Diff
|
@ -0,0 +1,291 @@
|
|||
/**********************************************************************
|
||||
* File: coutln.c (Formerly: coutline.c)
|
||||
* Description: Code for the C_OUTLINE class.
|
||||
* Author: Ray Smith
|
||||
* Created: Mon Oct 07 16:01:57 BST 1991
|
||||
*
|
||||
* (C) Copyright 1991, Hewlett-Packard Ltd.
|
||||
** Licensed under the Apache License, Version 2.0 (the "License");
|
||||
** you may not use this file except in compliance with the License.
|
||||
** You may obtain a copy of the License at
|
||||
** http://www.apache.org/licenses/LICENSE-2.0
|
||||
** Unless required by applicable law or agreed to in writing, software
|
||||
** distributed under the License is distributed on an "AS IS" BASIS,
|
||||
** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
** See the License for the specific language governing permissions and
|
||||
** limitations under the License.
|
||||
*
|
||||
**********************************************************************/
|
||||
|
||||
#ifndef COUTLN_H
|
||||
#define COUTLN_H
|
||||
|
||||
#include "crakedge.h"
|
||||
#include "mod128.h"
|
||||
#include "bits16.h"
|
||||
#include "rect.h"
|
||||
#include "blckerr.h"
|
||||
#include "scrollview.h"
|
||||
|
||||
class DENORM;
|
||||
|
||||
#define INTERSECTING MAX_INT16//no winding number
|
||||
|
||||
//mask to get step
|
||||
#define STEP_MASK 3
|
||||
|
||||
enum C_OUTLINE_FLAGS
|
||||
{
|
||||
COUT_INVERSE //White on black blob
|
||||
};
|
||||
|
||||
// Simple struct to hold the 3 values needed to compute a more precise edge
|
||||
// position and direction. The offset_numerator is the difference between the
|
||||
// grey threshold and the mean pixel value. pixel_diff is the difference between
|
||||
// the pixels in the edge. Consider the following row of pixels: p1 p2 p3 p4 p5
|
||||
// Say the image was thresholded at threshold t, making p1, p2, p3 black
|
||||
// and p4, p5 white (p1, p2, p3 < t, and p4, p5 >= t), but suppose that
|
||||
// max(p[i+1] - p[i]) is p3 - p2. Then the extrapolated position of the edge,
|
||||
// based on the maximum gradient, is at the crack between p2 and p3 plus the
|
||||
// offset (t - (p2+p3)/2)/(p3 - p2). We store the pixel difference p3-p2
|
||||
// denominator in pixel_diff and the offset numerator, relative to the original
|
||||
// binary edge (t - (p2+p3)/2) - (p3 -p2) in offset_numerator.
|
||||
// The sign of offset_numerator and pixel_diff are manipulated to ensure
|
||||
// that the pixel_diff, which will be used as a weight, is always positive.
|
||||
// The direction stores the quantized feature direction for the given step
|
||||
// computed from the edge gradient. (Using binary_angle_plus_pi.)
|
||||
// If the pixel_diff is zero, it means that the direction of the gradient
|
||||
// is in conflict with the step direction, so this step is to be ignored.
|
||||
struct EdgeOffset {
|
||||
inT8 offset_numerator;
|
||||
uinT8 pixel_diff;
|
||||
uinT8 direction;
|
||||
};
|
||||
|
||||
class DLLSYM C_OUTLINE; //forward declaration
|
||||
struct Pix;
|
||||
|
||||
ELISTIZEH (C_OUTLINE)
|
||||
class DLLSYM C_OUTLINE:public ELIST_LINK {
|
||||
public:
|
||||
C_OUTLINE() { //empty constructor
|
||||
steps = NULL;
|
||||
offsets = NULL;
|
||||
}
|
||||
C_OUTLINE( //constructor
|
||||
CRACKEDGE *startpt, //from edge detector
|
||||
ICOORD bot_left, //bounding box //length of loop
|
||||
ICOORD top_right,
|
||||
inT16 length);
|
||||
C_OUTLINE(ICOORD startpt, //start of loop
|
||||
DIR128 *new_steps, //steps in loop
|
||||
inT16 length); //length of loop
|
||||
//outline to copy
|
||||
C_OUTLINE(C_OUTLINE *srcline, FCOORD rotation); //and rotate
|
||||
|
||||
// Build a fake outline, given just a bounding box and append to the list.
|
||||
static void FakeOutline(const TBOX& box, C_OUTLINE_LIST* outlines);
|
||||
|
||||
~C_OUTLINE () { //destructor
|
||||
if (steps != NULL)
|
||||
free_mem(steps);
|
||||
steps = NULL;
|
||||
delete [] offsets;
|
||||
}
|
||||
|
||||
BOOL8 flag( //test flag
|
||||
C_OUTLINE_FLAGS mask) const { //flag to test
|
||||
return flags.bit (mask);
|
||||
}
|
||||
void set_flag( //set flag value
|
||||
C_OUTLINE_FLAGS mask, //flag to test
|
||||
BOOL8 value) { //value to set
|
||||
flags.set_bit (mask, value);
|
||||
}
|
||||
|
||||
C_OUTLINE_LIST *child() { //get child list
|
||||
return &children;
|
||||
}
|
||||
|
||||
//access function
|
||||
const TBOX &bounding_box() const {
|
||||
return box;
|
||||
}
|
||||
void set_step( //set a step
|
||||
inT16 stepindex, //index of step
|
||||
inT8 stepdir) { //chain code
|
||||
int shift = stepindex%4 * 2;
|
||||
uinT8 mask = 3 << shift;
|
||||
steps[stepindex/4] = ((stepdir << shift) & mask) |
|
||||
(steps[stepindex/4] & ~mask);
|
||||
//squeeze 4 into byte
|
||||
}
|
||||
void set_step( //set a step
|
||||
inT16 stepindex, //index of step
|
||||
DIR128 stepdir) { //direction
|
||||
//clean it
|
||||
inT8 chaindir = stepdir.get_dir() >> (DIRBITS - 2);
|
||||
//difference
|
||||
set_step(stepindex, chaindir);
|
||||
//squeeze 4 into byte
|
||||
}
|
||||
|
||||
inT32 pathlength() const { //get path length
|
||||
return stepcount;
|
||||
}
|
||||
// Return step at a given index as a DIR128.
|
||||
DIR128 step_dir(int index) const {
|
||||
return DIR128((inT16)(((steps[index/4] >> (index%4 * 2)) & STEP_MASK) <<
|
||||
(DIRBITS - 2)));
|
||||
}
|
||||
// Return the step vector for the given outline position.
|
||||
ICOORD step(int index) const { // index of step
|
||||
return step_coords[chain_code(index)];
|
||||
}
|
||||
// get start position
|
||||
const ICOORD &start_pos() const {
|
||||
return start;
|
||||
}
|
||||
// Returns the position at the given index on the outline.
|
||||
// NOT to be used lightly, as it has to iterate the outline to find out.
|
||||
ICOORD position_at_index(int index) const {
|
||||
ICOORD pos = start;
|
||||
for (int i = 0; i < index; ++i)
|
||||
pos += step(i);
|
||||
return pos;
|
||||
}
|
||||
// Returns the sub-pixel accurate position given the integer position pos
|
||||
// at the given index on the outline. pos may be a return value of
|
||||
// position_at_index, or computed by repeatedly adding step to the
|
||||
// start_pos() in the usual way.
|
||||
FCOORD sub_pixel_pos_at_index(const ICOORD& pos, int index) const {
|
||||
const ICOORD& step_to_next(step(index));
|
||||
FCOORD f_pos(pos.x() + step_to_next.x() / 2.0f,
|
||||
pos.y() + step_to_next.y() / 2.0f);
|
||||
if (offsets != NULL && offsets[index].pixel_diff > 0) {
|
||||
float offset = offsets[index].offset_numerator;
|
||||
offset /= offsets[index].pixel_diff;
|
||||
if (step_to_next.x() != 0)
|
||||
f_pos.set_y(f_pos.y() + offset);
|
||||
else
|
||||
f_pos.set_x(f_pos.x() + offset);
|
||||
}
|
||||
return f_pos;
|
||||
}
|
||||
// Returns the step direction for the given index or -1 if there is none.
|
||||
int direction_at_index(int index) const {
|
||||
if (offsets != NULL && offsets[index].pixel_diff > 0)
|
||||
return offsets[index].direction;
|
||||
return -1;
|
||||
}
|
||||
// Returns the edge strength for the given index.
|
||||
// If there are no recorded edge strengths, returns 1 (assuming the image
|
||||
// is binary). Returns 0 if the gradient direction conflicts with the
|
||||
// step direction, indicating that this position could be skipped.
|
||||
int edge_strength_at_index(int index) const {
|
||||
if (offsets != NULL)
|
||||
return offsets[index].pixel_diff;
|
||||
return 1;
|
||||
}
|
||||
// Return the step as a chain code (0-3) related to the standard feature
|
||||
// direction of binary_angle_plus_pi by:
|
||||
// chain_code * 64 = feature direction.
|
||||
int chain_code(int index) const { // index of step
|
||||
return (steps[index / 4] >> (index % 4 * 2)) & STEP_MASK;
|
||||
}
|
||||
|
||||
inT32 area() const; // Returns area of self and 1st level children.
|
||||
inT32 perimeter() const; // Total perimeter of self and 1st level children.
|
||||
inT32 outer_area() const; // Returns area of self only.
|
||||
inT32 count_transitions( //count maxima
|
||||
inT32 threshold); //size threshold
|
||||
|
||||
BOOL8 operator< ( //containment test
|
||||
const C_OUTLINE & other) const;
|
||||
BOOL8 operator> ( //containment test
|
||||
C_OUTLINE & other) const
|
||||
{
|
||||
return other < *this; //use the < to do it
|
||||
}
|
||||
inT16 winding_number( //get winding number
|
||||
ICOORD testpt) const; //around this point
|
||||
//get direction
|
||||
inT16 turn_direction() const;
|
||||
void reverse(); //reverse direction
|
||||
|
||||
void move( // reposition outline
|
||||
const ICOORD vec); // by vector
|
||||
|
||||
// Returns true if *this and its children are legally nested.
|
||||
// The outer area of a child should have the opposite sign to the
|
||||
// parent. If not, it means we have discarded an outline in between
|
||||
// (probably due to excessive length).
|
||||
bool IsLegallyNested() const;
|
||||
|
||||
// If this outline is smaller than the given min_size, delete this and
|
||||
// remove from its list, via *it, after checking that *it points to this.
|
||||
// Otherwise, if any children of this are too small, delete them.
|
||||
// On entry, *it must be an iterator pointing to this. If this gets deleted
|
||||
// then this is extracted from *it, so an iteration can continue.
|
||||
void RemoveSmallRecursive(int min_size, C_OUTLINE_IT* it);
|
||||
|
||||
// Adds sub-pixel resolution EdgeOffsets for the outline if the supplied
|
||||
// pix is 8-bit. Does nothing otherwise.
|
||||
void ComputeEdgeOffsets(int threshold, Pix* pix);
|
||||
// Adds sub-pixel resolution EdgeOffsets for the outline using only
|
||||
// a binary image source.
|
||||
void ComputeBinaryOffsets();
|
||||
|
||||
// Renders the outline to the given pix, with left and top being
|
||||
// the coords of the upper-left corner of the pix.
|
||||
void render(int left, int top, Pix* pix) const;
|
||||
|
||||
// Renders just the outline to the given pix (no fill), with left and top
|
||||
// being the coords of the upper-left corner of the pix.
|
||||
void render_outline(int left, int top, Pix* pix) const;
|
||||
|
||||
#ifndef GRAPHICS_DISABLED
|
||||
void plot( //draw one
|
||||
ScrollView* window, //window to draw in
|
||||
ScrollView::Color colour) const; //colour to draw it
|
||||
// Draws the outline in the given colour, normalized using the given denorm,
|
||||
// making use of sub-pixel accurate information if available.
|
||||
void plot_normed(const DENORM& denorm, ScrollView::Color colour,
|
||||
ScrollView* window) const;
|
||||
#endif // GRAPHICS_DISABLED
|
||||
|
||||
C_OUTLINE& operator=(const C_OUTLINE& source);
|
||||
|
||||
static C_OUTLINE* deep_copy(const C_OUTLINE* src) {
|
||||
C_OUTLINE* outline = new C_OUTLINE;
|
||||
*outline = *src;
|
||||
return outline;
|
||||
}
|
||||
|
||||
static ICOORD chain_step(int chaindir);
|
||||
|
||||
// The maximum length of any outline. The stepcount is stored as 16 bits,
|
||||
// but it is probably not a good idea to increase this constant by much
|
||||
// and switch to 32 bits, as it plays an important role in keeping huge
|
||||
// outlines invisible, which prevents bad speed behavior.
|
||||
static const int kMaxOutlineLength = 16000;
|
||||
|
||||
private:
|
||||
// Helper for ComputeBinaryOffsets. Increments pos, dir_counts, pos_totals
|
||||
// by the step, increment, and vertical step ? x : y position * increment
|
||||
// at step s Mod stepcount respectively. Used to add or subtract the
|
||||
// direction and position to/from accumulators of a small neighbourhood.
|
||||
void increment_step(int s, int increment, ICOORD* pos, int* dir_counts,
|
||||
int* pos_totals) const;
|
||||
int step_mem() const { return (stepcount+3) / 4; }
|
||||
|
||||
TBOX box; // bounding box
|
||||
ICOORD start; // start coord
|
||||
inT16 stepcount; // no of steps
|
||||
BITS16 flags; // flags about outline
|
||||
uinT8 *steps; // step array
|
||||
EdgeOffset* offsets; // Higher precision edge.
|
||||
C_OUTLINE_LIST children; // child elements
|
||||
static ICOORD step_coords[4];
|
||||
};
|
||||
#endif
|
|
@ -0,0 +1,37 @@
|
|||
/**********************************************************************
|
||||
* File: crakedge.h (Formerly: crkedge.h)
|
||||
* Description: Sturctures for the Crack following edge detector.
|
||||
* Author: Ray Smith
|
||||
* Created: Fri Mar 22 16:06:38 GMT 1991
|
||||
*
|
||||
* (C) Copyright 1991, Hewlett-Packard Ltd.
|
||||
** Licensed under the Apache License, Version 2.0 (the "License");
|
||||
** you may not use this file except in compliance with the License.
|
||||
** You may obtain a copy of the License at
|
||||
** http://www.apache.org/licenses/LICENSE-2.0
|
||||
** Unless required by applicable law or agreed to in writing, software
|
||||
** distributed under the License is distributed on an "AS IS" BASIS,
|
||||
** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
** See the License for the specific language governing permissions and
|
||||
** limitations under the License.
|
||||
*
|
||||
**********************************************************************/
|
||||
|
||||
#ifndef CRAKEDGE_H
|
||||
#define CRAKEDGE_H
|
||||
|
||||
#include "points.h"
|
||||
#include "mod128.h"
|
||||
|
||||
class CRACKEDGE {
|
||||
public:
|
||||
CRACKEDGE() {}
|
||||
|
||||
ICOORD pos; /*position of crack */
|
||||
inT8 stepx; //edge step
|
||||
inT8 stepy;
|
||||
inT8 stepdir; //chaincode
|
||||
CRACKEDGE *prev; /*previous point */
|
||||
CRACKEDGE *next; /*next point */
|
||||
};
|
||||
#endif
|
|
@ -0,0 +1,295 @@
|
|||
///////////////////////////////////////////////////////////////////////
|
||||
// File: detlinefit.cpp
|
||||
// Description: Deterministic least median squares line fitting.
|
||||
// Author: Ray Smith
|
||||
// Created: Thu Feb 28 14:45:01 PDT 2008
|
||||
//
|
||||
// (C) Copyright 2008, Google Inc.
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
///////////////////////////////////////////////////////////////////////
|
||||
|
||||
#include "detlinefit.h"
|
||||
#include "statistc.h"
|
||||
#include "ndminx.h"
|
||||
#include "tprintf.h"
|
||||
|
||||
namespace tesseract {
|
||||
|
||||
// The number of points to consider at each end.
|
||||
const int kNumEndPoints = 3;
|
||||
// The minimum number of points at which to switch to number of points
|
||||
// for badly fitted lines.
|
||||
// To ensure a sensible error metric, kMinPointsForErrorCount should be at
|
||||
// least kMaxRealDistance / (1 - %ile) where %ile is the fractile used in
|
||||
// ComputeUpperQuartileError.
|
||||
const int kMinPointsForErrorCount = 16;
|
||||
// The maximum real distance to use before switching to number of
|
||||
// mis-fitted points, which will get square-rooted for true distance.
|
||||
const int kMaxRealDistance = 2.0;
|
||||
|
||||
DetLineFit::DetLineFit() : square_length_(0.0) {
|
||||
}
|
||||
|
||||
DetLineFit::~DetLineFit() {
|
||||
}
|
||||
|
||||
// Delete all Added points.
|
||||
void DetLineFit::Clear() {
|
||||
pts_.clear();
|
||||
distances_.clear();
|
||||
}
|
||||
|
||||
// Add a new point. Takes a copy - the pt doesn't need to stay in scope.
|
||||
void DetLineFit::Add(const ICOORD& pt) {
|
||||
pts_.push_back(PointWidth(pt, 0));
|
||||
}
|
||||
// Associates a half-width with the given point if a point overlaps the
|
||||
// previous point by more than half the width, and its distance is further
|
||||
// than the previous point, then the more distant point is ignored in the
|
||||
// distance calculation. Useful for ignoring i dots and other diacritics.
|
||||
void DetLineFit::Add(const ICOORD& pt, int halfwidth) {
|
||||
pts_.push_back(PointWidth(pt, halfwidth));
|
||||
}
|
||||
|
||||
// Fits a line to the points, ignoring the skip_first initial points and the
|
||||
// skip_last final points, returning the fitted line as a pair of points,
|
||||
// and the upper quartile error.
|
||||
double DetLineFit::Fit(int skip_first, int skip_last,
|
||||
ICOORD* pt1, ICOORD* pt2) {
|
||||
// Do something sensible with no points.
|
||||
if (pts_.empty()) {
|
||||
pt1->set_x(0);
|
||||
pt1->set_y(0);
|
||||
*pt2 = *pt1;
|
||||
return 0.0;
|
||||
}
|
||||
// Count the points and find the first and last kNumEndPoints.
|
||||
int pt_count = pts_.size();
|
||||
ICOORD* starts[kNumEndPoints];
|
||||
if (skip_first >= pt_count) skip_first = pt_count - 1;
|
||||
int start_count = 0;
|
||||
int end_i = MIN(skip_first + kNumEndPoints, pt_count);
|
||||
for (int i = skip_first; i < end_i; ++i) {
|
||||
starts[start_count++] = &pts_[i].pt;
|
||||
}
|
||||
ICOORD* ends[kNumEndPoints];
|
||||
if (skip_last >= pt_count) skip_last = pt_count - 1;
|
||||
int end_count = 0;
|
||||
end_i = MAX(0, pt_count - kNumEndPoints - skip_last);
|
||||
for (int i = pt_count - 1 - skip_last; i >= end_i; --i) {
|
||||
ends[end_count++] = &pts_[i].pt;
|
||||
}
|
||||
// 1 or 2 points need special treatment.
|
||||
if (pt_count <= 2) {
|
||||
*pt1 = *starts[0];
|
||||
if (pt_count > 1)
|
||||
*pt2 = *ends[0];
|
||||
else
|
||||
*pt2 = *pt1;
|
||||
return 0.0;
|
||||
}
|
||||
// Although with between 2 and 2*kNumEndPoints-1 points, there will be
|
||||
// overlap in the starts, ends sets, this is OK and taken care of by the
|
||||
// if (*start != *end) test below, which also tests for equal input points.
|
||||
double best_uq = -1.0;
|
||||
// Iterate each pair of points and find the best fitting line.
|
||||
for (int i = 0; i < start_count; ++i) {
|
||||
ICOORD* start = starts[i];
|
||||
for (int j = 0; j < end_count; ++j) {
|
||||
ICOORD* end = ends[j];
|
||||
if (*start != *end) {
|
||||
ComputeDistances(*start, *end);
|
||||
// Compute the upper quartile error from the line.
|
||||
double dist = EvaluateLineFit();
|
||||
if (dist < best_uq || best_uq < 0.0) {
|
||||
best_uq = dist;
|
||||
*pt1 = *start;
|
||||
*pt2 = *end;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
// Finally compute the square root to return the true distance.
|
||||
return best_uq > 0.0 ? sqrt(best_uq) : best_uq;
|
||||
}
|
||||
|
||||
// Constrained fit with a supplied direction vector. Finds the best line_pt,
|
||||
// that is one of the supplied points having the median cross product with
|
||||
// direction, ignoring points that have a cross product outside of the range
|
||||
// [min_dist, max_dist]. Returns the resulting error metric using the same
|
||||
// reduced set of points.
|
||||
// *Makes use of floating point arithmetic*
|
||||
double DetLineFit::ConstrainedFit(const FCOORD& direction,
|
||||
double min_dist, double max_dist,
|
||||
bool debug, ICOORD* line_pt) {
|
||||
ComputeConstrainedDistances(direction, min_dist, max_dist);
|
||||
// Do something sensible with no points or computed distances.
|
||||
if (pts_.empty() || distances_.empty()) {
|
||||
line_pt->set_x(0);
|
||||
line_pt->set_y(0);
|
||||
return 0.0;
|
||||
}
|
||||
int median_index = distances_.choose_nth_item(distances_.size() / 2);
|
||||
*line_pt = distances_[median_index].data;
|
||||
if (debug) {
|
||||
tprintf("Constrained fit to dir %g, %g = %d, %d :%d distances:\n",
|
||||
direction.x(), direction.y(),
|
||||
line_pt->x(), line_pt->y(), distances_.size());
|
||||
for (int i = 0; i < distances_.size(); ++i) {
|
||||
tprintf("%d: %d, %d -> %g\n", i, distances_[i].data.x(),
|
||||
distances_[i].data.y(), distances_[i].key);
|
||||
}
|
||||
tprintf("Result = %d\n", median_index);
|
||||
}
|
||||
// Center distances on the fitted point.
|
||||
double dist_origin = direction * *line_pt;
|
||||
for (int i = 0; i < distances_.size(); ++i) {
|
||||
distances_[i].key -= dist_origin;
|
||||
}
|
||||
return sqrt(EvaluateLineFit());
|
||||
}
|
||||
|
||||
// Returns true if there were enough points at the last call to Fit or
|
||||
// ConstrainedFit for the fitted points to be used on a badly fitted line.
|
||||
bool DetLineFit::SufficientPointsForIndependentFit() const {
|
||||
return distances_.size() >= kMinPointsForErrorCount;
|
||||
}
|
||||
|
||||
// Backwards compatible fit returning a gradient and constant.
|
||||
// Deprecated. Prefer Fit(ICOORD*, ICOORD*) where possible, but use this
|
||||
// function in preference to the LMS class.
|
||||
double DetLineFit::Fit(float* m, float* c) {
|
||||
ICOORD start, end;
|
||||
double error = Fit(&start, &end);
|
||||
if (end.x() != start.x()) {
|
||||
*m = static_cast<float>(end.y() - start.y()) / (end.x() - start.x());
|
||||
*c = start.y() - *m * start.x();
|
||||
} else {
|
||||
*m = 0.0f;
|
||||
*c = 0.0f;
|
||||
}
|
||||
return error;
|
||||
}
|
||||
|
||||
// Backwards compatible constrained fit with a supplied gradient.
|
||||
// Deprecated. Use ConstrainedFit(const FCOORD& direction) where possible
|
||||
// to avoid potential difficulties with infinite gradients.
|
||||
double DetLineFit::ConstrainedFit(double m, float* c) {
|
||||
// Do something sensible with no points.
|
||||
if (pts_.empty()) {
|
||||
*c = 0.0f;
|
||||
return 0.0;
|
||||
}
|
||||
double cos = 1.0 / sqrt(1.0 + m * m);
|
||||
FCOORD direction(cos, m * cos);
|
||||
ICOORD line_pt;
|
||||
double error = ConstrainedFit(direction, -MAX_FLOAT32, MAX_FLOAT32, false,
|
||||
&line_pt);
|
||||
*c = line_pt.y() - line_pt.x() * m;
|
||||
return error;
|
||||
}
|
||||
|
||||
// Computes and returns the squared evaluation metric for a line fit.
|
||||
double DetLineFit::EvaluateLineFit() {
|
||||
// Compute the upper quartile error from the line.
|
||||
double dist = ComputeUpperQuartileError();
|
||||
if (distances_.size() >= kMinPointsForErrorCount &&
|
||||
dist > kMaxRealDistance * kMaxRealDistance) {
|
||||
// Use the number of mis-fitted points as the error metric, as this
|
||||
// gives a better measure of fit for badly fitted lines where more
|
||||
// than a quarter are badly fitted.
|
||||
double threshold = kMaxRealDistance * sqrt(square_length_);
|
||||
dist = NumberOfMisfittedPoints(threshold);
|
||||
}
|
||||
return dist;
|
||||
}
|
||||
|
||||
// Computes the absolute error distances of the points from the line,
|
||||
// and returns the squared upper-quartile error distance.
|
||||
double DetLineFit::ComputeUpperQuartileError() {
|
||||
int num_errors = distances_.size();
|
||||
if (num_errors == 0) return 0.0;
|
||||
// Get the absolute values of the errors.
|
||||
for (int i = 0; i < num_errors; ++i) {
|
||||
if (distances_[i].key < 0) distances_[i].key = -distances_[i].key;
|
||||
}
|
||||
// Now get the upper quartile distance.
|
||||
int index = distances_.choose_nth_item(3 * num_errors / 4);
|
||||
double dist = distances_[index].key;
|
||||
// The true distance is the square root of the dist squared / square_length.
|
||||
// Don't bother with the square root. Just return the square distance.
|
||||
return square_length_ > 0.0 ? dist * dist / square_length_ : 0.0;
|
||||
}
|
||||
|
||||
// Returns the number of sample points that have an error more than threshold.
|
||||
int DetLineFit::NumberOfMisfittedPoints(double threshold) const {
|
||||
int num_misfits = 0;
|
||||
int num_dists = distances_.size();
|
||||
// Get the absolute values of the errors.
|
||||
for (int i = 0; i < num_dists; ++i) {
|
||||
if (distances_[i].key > threshold)
|
||||
++num_misfits;
|
||||
}
|
||||
return num_misfits;
|
||||
}
|
||||
|
||||
// Computes all the cross product distances of the points from the line,
|
||||
// storing the actual (signed) cross products in distances.
|
||||
// Ignores distances of points that are further away than the previous point,
|
||||
// and overlaps the previous point by at least half.
|
||||
void DetLineFit::ComputeDistances(const ICOORD& start, const ICOORD& end) {
|
||||
distances_.truncate(0);
|
||||
ICOORD line_vector = end;
|
||||
line_vector -= start;
|
||||
square_length_ = line_vector.sqlength();
|
||||
int line_length = IntCastRounded(sqrt(square_length_));
|
||||
// Compute the distance of each point from the line.
|
||||
int prev_abs_dist = 0;
|
||||
int prev_dot = 0;
|
||||
for (int i = 0; i < pts_.size(); ++i) {
|
||||
ICOORD pt_vector = pts_[i].pt;
|
||||
pt_vector -= start;
|
||||
int dot = line_vector % pt_vector;
|
||||
// Compute |line_vector||pt_vector|sin(angle between)
|
||||
int dist = line_vector * pt_vector;
|
||||
int abs_dist = dist < 0 ? -dist : dist;
|
||||
if (abs_dist > prev_abs_dist && i > 0) {
|
||||
// Ignore this point if it overlaps the previous one.
|
||||
int separation = abs(dot - prev_dot);
|
||||
if (separation < line_length * pts_[i].halfwidth ||
|
||||
separation < line_length * pts_[i - 1].halfwidth)
|
||||
continue;
|
||||
}
|
||||
distances_.push_back(DistPointPair(dist, pts_[i].pt));
|
||||
prev_abs_dist = abs_dist;
|
||||
prev_dot = dot;
|
||||
}
|
||||
}
|
||||
|
||||
// Computes all the cross product distances of the points perpendicular to
|
||||
// the given direction, ignoring distances outside of the give distance range,
|
||||
// storing the actual (signed) cross products in distances_.
|
||||
void DetLineFit::ComputeConstrainedDistances(const FCOORD& direction,
|
||||
double min_dist, double max_dist) {
|
||||
distances_.truncate(0);
|
||||
square_length_ = direction.sqlength();
|
||||
// Compute the distance of each point from the line.
|
||||
for (int i = 0; i < pts_.size(); ++i) {
|
||||
FCOORD pt_vector = pts_[i].pt;
|
||||
// Compute |line_vector||pt_vector|sin(angle between)
|
||||
double dist = direction * pt_vector;
|
||||
if (min_dist <= dist && dist <= max_dist)
|
||||
distances_.push_back(DistPointPair(dist, pts_[i].pt));
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace tesseract.
|
|
@ -0,0 +1,164 @@
|
|||
///////////////////////////////////////////////////////////////////////
|
||||
// File: detlinefit.h
|
||||
// Description: Deterministic least upper-quartile squares line fitting.
|
||||
// Author: Ray Smith
|
||||
// Created: Thu Feb 28 14:35:01 PDT 2008
|
||||
//
|
||||
// (C) Copyright 2008, Google Inc.
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
///////////////////////////////////////////////////////////////////////
|
||||
|
||||
#ifndef TESSERACT_CCSTRUCT_DETLINEFIT_H_
|
||||
#define TESSERACT_CCSTRUCT_DETLINEFIT_H_
|
||||
|
||||
#include "genericvector.h"
|
||||
#include "kdpair.h"
|
||||
#include "points.h"
|
||||
|
||||
namespace tesseract {
|
||||
|
||||
// This class fits a line to a set of ICOORD points.
|
||||
// There is no restriction on the direction of the line, as it
|
||||
// uses a vector method, ie no concern over infinite gradients.
|
||||
// The fitted line has the least upper quartile of squares of perpendicular
|
||||
// distances of all source points from the line, subject to the constraint
|
||||
// that the line is made from one of the pairs of [{p1,p2,p3},{pn-2, pn-1, pn}]
|
||||
// i.e. the 9 combinations of one of the first 3 and last 3 points.
|
||||
// A fundamental assumption of this algorithm is that one of the first 3 and
|
||||
// one of the last 3 points are near the best line fit.
|
||||
// The points must be Added in line order for the algorithm to work properly.
|
||||
// No floating point calculations are needed* to make an accurate fit,
|
||||
// and no random numbers are needed** so the algorithm is deterministic,
|
||||
// architecture-stable, and compiler-stable as well as stable to minor
|
||||
// changes in the input.
|
||||
// *A single floating point division is used to compute each line's distance.
|
||||
// This is unlikely to result in choice of a different line, but if it does,
|
||||
// it would be easy to replace with a 64 bit integer calculation.
|
||||
// **Random numbers are used in the nth_item function, but the worst
|
||||
// non-determinism that can result is picking a different result among equals,
|
||||
// and that wouldn't make any difference to the end-result distance, so the
|
||||
// randomness does not affect the determinism of the algorithm. The random
|
||||
// numbers are only there to guarantee average linear time.
|
||||
// Fitting time is linear, but with a high constant, as it tries 9 different
|
||||
// lines and computes the distance of all points each time.
|
||||
// This class is aimed at replacing the LLSQ (linear least squares) and
|
||||
// LMS (least median of squares) classes that are currently used for most
|
||||
// of the line fitting in Tesseract.
|
||||
class DetLineFit {
|
||||
public:
|
||||
DetLineFit();
|
||||
~DetLineFit();
|
||||
|
||||
// Delete all Added points.
|
||||
void Clear();
|
||||
|
||||
// Adds a new point. Takes a copy - the pt doesn't need to stay in scope.
|
||||
// Add must be called on points in sequence along the line.
|
||||
void Add(const ICOORD& pt);
|
||||
// Associates a half-width with the given point if a point overlaps the
|
||||
// previous point by more than half the width, and its distance is further
|
||||
// than the previous point, then the more distant point is ignored in the
|
||||
// distance calculation. Useful for ignoring i dots and other diacritics.
|
||||
void Add(const ICOORD& pt, int halfwidth);
|
||||
|
||||
// Fits a line to the points, returning the fitted line as a pair of
|
||||
// points, and the upper quartile error.
|
||||
double Fit(ICOORD* pt1, ICOORD* pt2) {
|
||||
return Fit(0, 0, pt1, pt2);
|
||||
}
|
||||
// Fits a line to the points, ignoring the skip_first initial points and the
|
||||
// skip_last final points, returning the fitted line as a pair of points,
|
||||
// and the upper quartile error.
|
||||
double Fit(int skip_first, int skip_last, ICOORD* pt1, ICOORD* pt2);
|
||||
|
||||
// Constrained fit with a supplied direction vector. Finds the best line_pt,
|
||||
// that is one of the supplied points having the median cross product with
|
||||
// direction, ignoring points that have a cross product outside of the range
|
||||
// [min_dist, max_dist]. Returns the resulting error metric using the same
|
||||
// reduced set of points.
|
||||
// *Makes use of floating point arithmetic*
|
||||
double ConstrainedFit(const FCOORD& direction,
|
||||
double min_dist, double max_dist,
|
||||
bool debug, ICOORD* line_pt);
|
||||
|
||||
// Returns true if there were enough points at the last call to Fit or
|
||||
// ConstrainedFit for the fitted points to be used on a badly fitted line.
|
||||
bool SufficientPointsForIndependentFit() const;
|
||||
|
||||
// Backwards compatible fit returning a gradient and constant.
|
||||
// Deprecated. Prefer Fit(ICOORD*, ICOORD*) where possible, but use this
|
||||
// function in preference to the LMS class.
|
||||
double Fit(float* m, float* c);
|
||||
|
||||
// Backwards compatible constrained fit with a supplied gradient.
|
||||
// Deprecated. Use ConstrainedFit(const FCOORD& direction) where possible
|
||||
// to avoid potential difficulties with infinite gradients.
|
||||
double ConstrainedFit(double m, float* c);
|
||||
|
||||
private:
|
||||
// Simple struct to hold an ICOORD point and a halfwidth representing half
|
||||
// the "width" (supposedly approximately parallel to the direction of the
|
||||
// line) of each point, such that distant points can be discarded when they
|
||||
// overlap nearer points. (Think i dot and other diacritics or noise.)
|
||||
struct PointWidth {
|
||||
PointWidth() : pt(ICOORD(0, 0)), halfwidth(0) {}
|
||||
PointWidth(const ICOORD& pt0, int halfwidth0)
|
||||
: pt(pt0), halfwidth(halfwidth0) {}
|
||||
|
||||
ICOORD pt;
|
||||
int halfwidth;
|
||||
};
|
||||
// Type holds the distance of each point from the fitted line and the point
|
||||
// itself. Use of double allows integer distances from ICOORDs to be stored
|
||||
// exactly, and also the floating point results from ConstrainedFit.
|
||||
typedef KDPairInc<double, ICOORD> DistPointPair;
|
||||
|
||||
// Computes and returns the squared evaluation metric for a line fit.
|
||||
double EvaluateLineFit();
|
||||
|
||||
// Computes the absolute values of the precomputed distances_,
|
||||
// and returns the squared upper-quartile error distance.
|
||||
double ComputeUpperQuartileError();
|
||||
|
||||
// Returns the number of sample points that have an error more than threshold.
|
||||
int NumberOfMisfittedPoints(double threshold) const;
|
||||
|
||||
// Computes all the cross product distances of the points from the line,
|
||||
// storing the actual (signed) cross products in distances_.
|
||||
// Ignores distances of points that are further away than the previous point,
|
||||
// and overlaps the previous point by at least half.
|
||||
void ComputeDistances(const ICOORD& start, const ICOORD& end);
|
||||
|
||||
// Computes all the cross product distances of the points perpendicular to
|
||||
// the given direction, ignoring distances outside of the give distance range,
|
||||
// storing the actual (signed) cross products in distances_.
|
||||
void ComputeConstrainedDistances(const FCOORD& direction,
|
||||
double min_dist, double max_dist);
|
||||
|
||||
// Stores all the source points in the order they were given and their
|
||||
// halfwidths, if any.
|
||||
GenericVector<PointWidth> pts_;
|
||||
// Stores the computed perpendicular distances of (some of) the pts_ from a
|
||||
// given vector (assuming it goes through the origin, making it a line).
|
||||
// Since the distances may be a subset of the input points, and get
|
||||
// re-ordered by the nth_item function, the original point is stored
|
||||
// along side the distance.
|
||||
GenericVector<DistPointPair> distances_; // Distances of points.
|
||||
// The squared length of the vector used to compute distances_.
|
||||
double square_length_;
|
||||
};
|
||||
|
||||
} // namespace tesseract.
|
||||
|
||||
#endif // TESSERACT_CCSTRUCT_DETLINEFIT_H_
|
||||
|
||||
|
|
@ -0,0 +1,98 @@
|
|||
/**********************************************************************
|
||||
* File: dppoint.cpp
|
||||
* Description: Simple generic dynamic programming class.
|
||||
* Author: Ray Smith
|
||||
* Created: Wed Mar 25 19:08:01 PDT 2009
|
||||
*
|
||||
* (C) Copyright 2009, Google Inc.
|
||||
** Licensed under the Apache License, Version 2.0 (the "License");
|
||||
** you may not use this file except in compliance with the License.
|
||||
** You may obtain a copy of the License at
|
||||
** http://www.apache.org/licenses/LICENSE-2.0
|
||||
** Unless required by applicable law or agreed to in writing, software
|
||||
** distributed under the License is distributed on an "AS IS" BASIS,
|
||||
** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
** See the License for the specific language governing permissions and
|
||||
** limitations under the License.
|
||||
*
|
||||
**********************************************************************/
|
||||
|
||||
#include "dppoint.h"
|
||||
#include "tprintf.h"
|
||||
|
||||
namespace tesseract {
|
||||
|
||||
// Solve the dynamic programming problem for the given array of points, with
|
||||
// the given size and cost function.
|
||||
// Steps backwards are limited to being between min_step and max_step
|
||||
// inclusive.
|
||||
// The return value is the tail of the best path.
|
||||
DPPoint* DPPoint::Solve(int min_step, int max_step, bool debug,
|
||||
CostFunc cost_func, int size, DPPoint* points) {
|
||||
if (size <= 0 || max_step < min_step || min_step >= size)
|
||||
return NULL; // Degenerate, but not necessarily an error.
|
||||
ASSERT_HOST(min_step > 0); // Infinite loop possible if this is not true.
|
||||
if (debug)
|
||||
tprintf("min = %d, max=%d\n",
|
||||
min_step, max_step);
|
||||
// Evaluate the total cost at each point.
|
||||
for (int i = 0; i < size; ++i) {
|
||||
for (int offset = min_step; offset <= max_step; ++offset) {
|
||||
DPPoint* prev = offset <= i ? points + i - offset : NULL;
|
||||
inT64 new_cost = (points[i].*cost_func)(prev);
|
||||
if (points[i].best_prev_ != NULL && offset > min_step * 2 &&
|
||||
new_cost > points[i].total_cost_)
|
||||
break; // Find only the first minimum if going over twice the min.
|
||||
}
|
||||
points[i].total_cost_ += points[i].local_cost_;
|
||||
if (debug) {
|
||||
tprintf("At point %d, local cost=%d, total_cost=%d, steps=%d\n",
|
||||
i, points[i].local_cost_, points[i].total_cost_,
|
||||
points[i].total_steps_);
|
||||
}
|
||||
}
|
||||
// Now find the end of the best path and return it.
|
||||
int best_cost = points[size - 1].total_cost_;
|
||||
int best_end = size - 1;
|
||||
for (int end = best_end - 1; end >= size - min_step; --end) {
|
||||
int cost = points[end].total_cost_;
|
||||
if (cost < best_cost) {
|
||||
best_cost = cost;
|
||||
best_end = end;
|
||||
}
|
||||
}
|
||||
return points + best_end;
|
||||
}
|
||||
|
||||
// A CostFunc that takes the variance of step into account in the cost.
|
||||
inT64 DPPoint::CostWithVariance(const DPPoint* prev) {
|
||||
if (prev == NULL || prev == this) {
|
||||
UpdateIfBetter(0, 1, NULL, 0, 0, 0);
|
||||
return 0;
|
||||
}
|
||||
|
||||
int delta = this - prev;
|
||||
inT32 n = prev->n_ + 1;
|
||||
inT32 sig_x = prev->sig_x_ + delta;
|
||||
inT64 sig_xsq = prev->sig_xsq_ + delta * delta;
|
||||
inT64 cost = (sig_xsq - sig_x * sig_x / n) / n;
|
||||
cost += prev->total_cost_;
|
||||
UpdateIfBetter(cost, prev->total_steps_ + 1, prev, n, sig_x, sig_xsq);
|
||||
return cost;
|
||||
}
|
||||
|
||||
// Update the other members if the cost is lower.
|
||||
void DPPoint::UpdateIfBetter(inT64 cost, inT32 steps, const DPPoint* prev,
|
||||
inT32 n, inT32 sig_x, inT64 sig_xsq) {
|
||||
if (cost < total_cost_) {
|
||||
total_cost_ = cost;
|
||||
total_steps_ = steps;
|
||||
best_prev_ = prev;
|
||||
n_ = n;
|
||||
sig_x_ = sig_x;
|
||||
sig_xsq_ = sig_xsq;
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace tesseract.
|
||||
|
|
@ -0,0 +1,102 @@
|
|||
/**********************************************************************
|
||||
* File: dppoint.h
|
||||
* Description: Simple generic dynamic programming class.
|
||||
* Author: Ray Smith
|
||||
* Created: Wed Mar 25 18:57:01 PDT 2009
|
||||
*
|
||||
* (C) Copyright 2009, Google Inc.
|
||||
** Licensed under the Apache License, Version 2.0 (the "License");
|
||||
** you may not use this file except in compliance with the License.
|
||||
** You may obtain a copy of the License at
|
||||
** http://www.apache.org/licenses/LICENSE-2.0
|
||||
** Unless required by applicable law or agreed to in writing, software
|
||||
** distributed under the License is distributed on an "AS IS" BASIS,
|
||||
** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
** See the License for the specific language governing permissions and
|
||||
** limitations under the License.
|
||||
*
|
||||
**********************************************************************/
|
||||
|
||||
#ifndef TESSERACT_CCSTRUCT_DPPOINT_H__
|
||||
#define TESSERACT_CCSTRUCT_DPPOINT_H__
|
||||
|
||||
#include "host.h"
|
||||
|
||||
namespace tesseract {
|
||||
|
||||
// A simple class to provide a dynamic programming solution to a class of
|
||||
// 1st-order problems in which the cost is dependent only on the current
|
||||
// step and the best cost to that step, with a possible special case
|
||||
// of using the variance of the steps, and only the top choice is required.
|
||||
// Useful for problems such as finding the optimal cut points in a fixed-pitch
|
||||
// (vertical or horizontal) situation.
|
||||
// Skeletal Example:
|
||||
// DPPoint* array = new DPPoint[width];
|
||||
// for (int i = 0; i < width; i++) {
|
||||
// array[i].AddLocalCost(cost_at_i)
|
||||
// }
|
||||
// DPPoint* best_end = DPPoint::Solve(..., array);
|
||||
// while (best_end != NULL) {
|
||||
// int cut_index = best_end - array;
|
||||
// best_end = best_end->best_prev();
|
||||
// }
|
||||
// delete [] array;
|
||||
class DPPoint {
|
||||
public:
|
||||
// The cost function evaluates the total cost at this (excluding this's
|
||||
// local_cost) and if it beats this's total_cost, then
|
||||
// replace the appropriate values in this.
|
||||
typedef inT64 (DPPoint::*CostFunc)(const DPPoint* prev);
|
||||
|
||||
DPPoint()
|
||||
: local_cost_(0), total_cost_(MAX_INT32), total_steps_(1), best_prev_(NULL),
|
||||
n_(0), sig_x_(0), sig_xsq_(0) {
|
||||
}
|
||||
|
||||
// Solve the dynamic programming problem for the given array of points, with
|
||||
// the given size and cost function.
|
||||
// Steps backwards are limited to being between min_step and max_step
|
||||
// inclusive.
|
||||
// The return value is the tail of the best path.
|
||||
static DPPoint* Solve(int min_step, int max_step, bool debug,
|
||||
CostFunc cost_func, int size, DPPoint* points);
|
||||
|
||||
// A CostFunc that takes the variance of step into account in the cost.
|
||||
inT64 CostWithVariance(const DPPoint* prev);
|
||||
|
||||
// Accessors.
|
||||
int total_cost() const {
|
||||
return total_cost_;
|
||||
}
|
||||
int Pathlength() const {
|
||||
return total_steps_;
|
||||
}
|
||||
const DPPoint* best_prev() const {
|
||||
return best_prev_;
|
||||
}
|
||||
void AddLocalCost(int new_cost) {
|
||||
local_cost_ += new_cost;
|
||||
}
|
||||
|
||||
private:
|
||||
// Code common to different cost functions.
|
||||
|
||||
// Update the other members if the cost is lower.
|
||||
void UpdateIfBetter(inT64 cost, inT32 steps, const DPPoint* prev,
|
||||
inT32 n, inT32 sig_x, inT64 sig_xsq);
|
||||
|
||||
inT32 local_cost_; // Cost of this point on its own.
|
||||
inT32 total_cost_; // Sum of all costs in best path to here.
|
||||
// During cost calculations local_cost is excluded.
|
||||
inT32 total_steps_; // Number of steps in best path to here.
|
||||
const DPPoint* best_prev_; // Pointer to prev point in best path from here.
|
||||
// Information for computing the variance part of the cost.
|
||||
inT32 n_; // Number of steps in best path to here for variance.
|
||||
inT32 sig_x_; // Sum of step sizes for computing variance.
|
||||
inT64 sig_xsq_; // Sum of squares of steps for computing variance.
|
||||
};
|
||||
|
||||
} // namespace tesseract.
|
||||
|
||||
#endif // TESSERACT_CCSTRUCT_DPPOINT_H__
|
||||
|
|
@ -0,0 +1,262 @@
|
|||
///////////////////////////////////////////////////////////////////////
|
||||
// File: fontinfo.cpp
|
||||
// Description: Font information classes abstracted from intproto.h/cpp.
|
||||
// Author: rays@google.com (Ray Smith)
|
||||
// Created: Wed May 18 10:39:01 PDT 2011
|
||||
//
|
||||
// (C) Copyright 2011, Google Inc.
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
///////////////////////////////////////////////////////////////////////
|
||||
|
||||
#include "fontinfo.h"
|
||||
#include "bitvector.h"
|
||||
#include "unicity_table.h"
|
||||
|
||||
namespace tesseract {
|
||||
|
||||
// Writes to the given file. Returns false in case of error.
|
||||
bool FontInfo::Serialize(FILE* fp) const {
|
||||
if (!write_info(fp, *this)) return false;
|
||||
if (!write_spacing_info(fp, *this)) return false;
|
||||
return true;
|
||||
}
|
||||
// Reads from the given file. Returns false in case of error.
|
||||
// If swap is true, assumes a big/little-endian swap is needed.
|
||||
bool FontInfo::DeSerialize(bool swap, FILE* fp) {
|
||||
if (!read_info(fp, this, swap)) return false;
|
||||
if (!read_spacing_info(fp, this, swap)) return false;
|
||||
return true;
|
||||
}
|
||||
|
||||
FontInfoTable::FontInfoTable() {
|
||||
set_compare_callback(NewPermanentTessCallback(CompareFontInfo));
|
||||
set_clear_callback(NewPermanentTessCallback(FontInfoDeleteCallback));
|
||||
}
|
||||
|
||||
FontInfoTable::~FontInfoTable() {
|
||||
}
|
||||
|
||||
// Writes to the given file. Returns false in case of error.
|
||||
bool FontInfoTable::Serialize(FILE* fp) const {
|
||||
return this->SerializeClasses(fp);
|
||||
}
|
||||
// Reads from the given file. Returns false in case of error.
|
||||
// If swap is true, assumes a big/little-endian swap is needed.
|
||||
bool FontInfoTable::DeSerialize(bool swap, FILE* fp) {
|
||||
truncate(0);
|
||||
return this->DeSerializeClasses(swap, fp);
|
||||
}
|
||||
|
||||
// Returns true if the given set of fonts includes one with the same
|
||||
// properties as font_id.
|
||||
bool FontInfoTable::SetContainsFontProperties(
|
||||
int font_id, const GenericVector<ScoredFont>& font_set) const {
|
||||
uinT32 properties = get(font_id).properties;
|
||||
for (int f = 0; f < font_set.size(); ++f) {
|
||||
if (get(font_set[f].fontinfo_id).properties == properties)
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
// Returns true if the given set of fonts includes multiple properties.
|
||||
bool FontInfoTable::SetContainsMultipleFontProperties(
|
||||
const GenericVector<ScoredFont>& font_set) const {
|
||||
if (font_set.empty()) return false;
|
||||
int first_font = font_set[0].fontinfo_id;
|
||||
uinT32 properties = get(first_font).properties;
|
||||
for (int f = 1; f < font_set.size(); ++f) {
|
||||
if (get(font_set[f].fontinfo_id).properties != properties)
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
// Moves any non-empty FontSpacingInfo entries from other to this.
|
||||
void FontInfoTable::MoveSpacingInfoFrom(FontInfoTable* other) {
|
||||
set_compare_callback(NewPermanentTessCallback(CompareFontInfo));
|
||||
set_clear_callback(NewPermanentTessCallback(FontInfoDeleteCallback));
|
||||
for (int i = 0; i < other->size(); ++i) {
|
||||
GenericVector<FontSpacingInfo*>* spacing_vec = other->get(i).spacing_vec;
|
||||
if (spacing_vec != NULL) {
|
||||
int target_index = get_index(other->get(i));
|
||||
if (target_index < 0) {
|
||||
// Bit copy the FontInfo and steal all the pointers.
|
||||
push_back(other->get(i));
|
||||
other->get(i).name = NULL;
|
||||
} else {
|
||||
delete [] get(target_index).spacing_vec;
|
||||
get(target_index).spacing_vec = other->get(i).spacing_vec;
|
||||
}
|
||||
other->get(i).spacing_vec = NULL;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Moves this to the target unicity table.
|
||||
void FontInfoTable::MoveTo(UnicityTable<FontInfo>* target) {
|
||||
target->clear();
|
||||
target->set_compare_callback(NewPermanentTessCallback(CompareFontInfo));
|
||||
target->set_clear_callback(NewPermanentTessCallback(FontInfoDeleteCallback));
|
||||
for (int i = 0; i < size(); ++i) {
|
||||
// Bit copy the FontInfo and steal all the pointers.
|
||||
target->push_back(get(i));
|
||||
get(i).name = NULL;
|
||||
get(i).spacing_vec = NULL;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
// Compare FontInfo structures.
|
||||
bool CompareFontInfo(const FontInfo& fi1, const FontInfo& fi2) {
|
||||
// The font properties are required to be the same for two font with the same
|
||||
// name, so there is no need to test them.
|
||||
// Consequently, querying the table with only its font name as information is
|
||||
// enough to retrieve its properties.
|
||||
return strcmp(fi1.name, fi2.name) == 0;
|
||||
}
|
||||
// Compare FontSet structures.
|
||||
bool CompareFontSet(const FontSet& fs1, const FontSet& fs2) {
|
||||
if (fs1.size != fs2.size)
|
||||
return false;
|
||||
for (int i = 0; i < fs1.size; ++i) {
|
||||
if (fs1.configs[i] != fs2.configs[i])
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
// Callbacks for GenericVector.
|
||||
void FontInfoDeleteCallback(FontInfo f) {
|
||||
if (f.spacing_vec != NULL) {
|
||||
f.spacing_vec->delete_data_pointers();
|
||||
delete f.spacing_vec;
|
||||
}
|
||||
delete[] f.name;
|
||||
}
|
||||
void FontSetDeleteCallback(FontSet fs) {
|
||||
delete[] fs.configs;
|
||||
}
|
||||
|
||||
/*---------------------------------------------------------------------------*/
|
||||
// Callbacks used by UnicityTable to read/write FontInfo/FontSet structures.
|
||||
bool read_info(FILE* f, FontInfo* fi, bool swap) {
|
||||
inT32 size;
|
||||
if (fread(&size, sizeof(size), 1, f) != 1) return false;
|
||||
if (swap)
|
||||
Reverse32(&size);
|
||||
char* font_name = new char[size + 1];
|
||||
fi->name = font_name;
|
||||
if (static_cast<int>(fread(font_name, sizeof(*font_name), size, f)) != size)
|
||||
return false;
|
||||
font_name[size] = '\0';
|
||||
if (fread(&fi->properties, sizeof(fi->properties), 1, f) != 1) return false;
|
||||
if (swap)
|
||||
Reverse32(&fi->properties);
|
||||
return true;
|
||||
}
|
||||
|
||||
bool write_info(FILE* f, const FontInfo& fi) {
|
||||
inT32 size = strlen(fi.name);
|
||||
if (fwrite(&size, sizeof(size), 1, f) != 1) return false;
|
||||
if (static_cast<int>(fwrite(fi.name, sizeof(*fi.name), size, f)) != size)
|
||||
return false;
|
||||
if (fwrite(&fi.properties, sizeof(fi.properties), 1, f) != 1) return false;
|
||||
return true;
|
||||
}
|
||||
|
||||
bool read_spacing_info(FILE *f, FontInfo* fi, bool swap) {
|
||||
inT32 vec_size, kern_size;
|
||||
if (fread(&vec_size, sizeof(vec_size), 1, f) != 1) return false;
|
||||
if (swap) Reverse32(&vec_size);
|
||||
ASSERT_HOST(vec_size >= 0);
|
||||
if (vec_size == 0) return true;
|
||||
fi->init_spacing(vec_size);
|
||||
for (int i = 0; i < vec_size; ++i) {
|
||||
FontSpacingInfo *fs = new FontSpacingInfo();
|
||||
if (fread(&fs->x_gap_before, sizeof(fs->x_gap_before), 1, f) != 1 ||
|
||||
fread(&fs->x_gap_after, sizeof(fs->x_gap_after), 1, f) != 1 ||
|
||||
fread(&kern_size, sizeof(kern_size), 1, f) != 1) {
|
||||
delete fs;
|
||||
return false;
|
||||
}
|
||||
if (swap) {
|
||||
ReverseN(&(fs->x_gap_before), sizeof(fs->x_gap_before));
|
||||
ReverseN(&(fs->x_gap_after), sizeof(fs->x_gap_after));
|
||||
Reverse32(&kern_size);
|
||||
}
|
||||
if (kern_size < 0) { // indication of a NULL entry in fi->spacing_vec
|
||||
delete fs;
|
||||
continue;
|
||||
}
|
||||
if (kern_size > 0 && (!fs->kerned_unichar_ids.DeSerialize(swap, f) ||
|
||||
!fs->kerned_x_gaps.DeSerialize(swap, f))) {
|
||||
delete fs;
|
||||
return false;
|
||||
}
|
||||
fi->add_spacing(i, fs);
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
bool write_spacing_info(FILE* f, const FontInfo& fi) {
|
||||
inT32 vec_size = (fi.spacing_vec == NULL) ? 0 : fi.spacing_vec->size();
|
||||
if (fwrite(&vec_size, sizeof(vec_size), 1, f) != 1) return false;
|
||||
inT16 x_gap_invalid = -1;
|
||||
for (int i = 0; i < vec_size; ++i) {
|
||||
FontSpacingInfo *fs = fi.spacing_vec->get(i);
|
||||
inT32 kern_size = (fs == NULL) ? -1 : fs->kerned_x_gaps.size();
|
||||
if (fs == NULL) {
|
||||
// Valid to have the identical fwrites. Writing invalid x-gaps.
|
||||
if (fwrite(&(x_gap_invalid), sizeof(x_gap_invalid), 1, f) != 1 ||
|
||||
fwrite(&(x_gap_invalid), sizeof(x_gap_invalid), 1, f) != 1 ||
|
||||
fwrite(&kern_size, sizeof(kern_size), 1, f) != 1) {
|
||||
return false;
|
||||
}
|
||||
} else {
|
||||
if (fwrite(&(fs->x_gap_before), sizeof(fs->x_gap_before), 1, f) != 1 ||
|
||||
fwrite(&(fs->x_gap_after), sizeof(fs->x_gap_after), 1, f) != 1 ||
|
||||
fwrite(&kern_size, sizeof(kern_size), 1, f) != 1) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
if (kern_size > 0 && (!fs->kerned_unichar_ids.Serialize(f) ||
|
||||
!fs->kerned_x_gaps.Serialize(f))) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
bool read_set(FILE* f, FontSet* fs, bool swap) {
|
||||
if (fread(&fs->size, sizeof(fs->size), 1, f) != 1) return false;
|
||||
if (swap)
|
||||
Reverse32(&fs->size);
|
||||
fs->configs = new int32_t[fs->size];
|
||||
for (int i = 0; i < fs->size; ++i) {
|
||||
if (fread(&fs->configs[i], sizeof(fs->configs[i]), 1, f) != 1) return false;
|
||||
if (swap)
|
||||
Reverse32(&fs->configs[i]);
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
bool write_set(FILE* f, const FontSet& fs) {
|
||||
if (fwrite(&fs.size, sizeof(fs.size), 1, f) != 1) return false;
|
||||
for (int i = 0; i < fs.size; ++i) {
|
||||
if (fwrite(&fs.configs[i], sizeof(fs.configs[i]), 1, f) != 1) return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
} // namespace tesseract.
|
||||
|
|
@ -0,0 +1,191 @@
|
|||
///////////////////////////////////////////////////////////////////////
|
||||
// File: fontinfo.h
|
||||
// Description: Font information classes abstracted from intproto.h/cpp.
|
||||
// Author: rays@google.com (Ray Smith)
|
||||
// Created: Tue May 17 17:08:01 PDT 2011
|
||||
//
|
||||
// (C) Copyright 2011, Google Inc.
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
///////////////////////////////////////////////////////////////////////
|
||||
|
||||
|
||||
#ifndef TESSERACT_CCSTRUCT_FONTINFO_H_
|
||||
#define TESSERACT_CCSTRUCT_FONTINFO_H_
|
||||
|
||||
#include "genericvector.h"
|
||||
#include "host.h"
|
||||
#include "unichar.h"
|
||||
|
||||
#include <stdint.h>
|
||||
|
||||
template <typename T> class UnicityTable;
|
||||
|
||||
namespace tesseract {
|
||||
|
||||
class BitVector;
|
||||
|
||||
// Simple struct to hold a font and a score. The scores come from the low-level
|
||||
// integer matcher, so they are in the uinT16 range. Fonts are an index to
|
||||
// fontinfo_table.
|
||||
// These get copied around a lot, so best to keep them small.
|
||||
struct ScoredFont {
|
||||
ScoredFont() : fontinfo_id(-1), score(0) {}
|
||||
ScoredFont(int font_id, uinT16 classifier_score)
|
||||
: fontinfo_id(font_id), score(classifier_score) {}
|
||||
|
||||
// Index into fontinfo table, but inside the classifier, may be a shapetable
|
||||
// index.
|
||||
inT32 fontinfo_id;
|
||||
// Raw score from the low-level classifier.
|
||||
uinT16 score;
|
||||
};
|
||||
|
||||
// Struct for information about spacing between characters in a particular font.
|
||||
struct FontSpacingInfo {
|
||||
inT16 x_gap_before;
|
||||
inT16 x_gap_after;
|
||||
GenericVector<UNICHAR_ID> kerned_unichar_ids;
|
||||
GenericVector<inT16> kerned_x_gaps;
|
||||
};
|
||||
|
||||
/*
|
||||
* font_properties contains properties about boldness, italicness, fixed pitch,
|
||||
* serif, fraktur
|
||||
*/
|
||||
struct FontInfo {
|
||||
FontInfo() : name(NULL), properties(0), universal_id(0), spacing_vec(NULL) {}
|
||||
~FontInfo() {}
|
||||
|
||||
// Writes to the given file. Returns false in case of error.
|
||||
bool Serialize(FILE* fp) const;
|
||||
// Reads from the given file. Returns false in case of error.
|
||||
// If swap is true, assumes a big/little-endian swap is needed.
|
||||
bool DeSerialize(bool swap, FILE* fp);
|
||||
|
||||
// Reserves unicharset_size spots in spacing_vec.
|
||||
void init_spacing(int unicharset_size) {
|
||||
spacing_vec = new GenericVector<FontSpacingInfo *>();
|
||||
spacing_vec->init_to_size(unicharset_size, NULL);
|
||||
}
|
||||
// Adds the given pointer to FontSpacingInfo to spacing_vec member
|
||||
// (FontInfo class takes ownership of the pointer).
|
||||
// Note: init_spacing should be called before calling this function.
|
||||
void add_spacing(UNICHAR_ID uch_id, FontSpacingInfo *spacing_info) {
|
||||
ASSERT_HOST(spacing_vec != NULL && spacing_vec->size() > uch_id);
|
||||
(*spacing_vec)[uch_id] = spacing_info;
|
||||
}
|
||||
|
||||
// Returns the pointer to FontSpacingInfo for the given UNICHAR_ID.
|
||||
const FontSpacingInfo *get_spacing(UNICHAR_ID uch_id) const {
|
||||
return (spacing_vec == NULL || spacing_vec->size() <= uch_id) ?
|
||||
NULL : (*spacing_vec)[uch_id];
|
||||
}
|
||||
|
||||
// Fills spacing with the value of the x gap expected between the two given
|
||||
// UNICHAR_IDs. Returns true on success.
|
||||
bool get_spacing(UNICHAR_ID prev_uch_id,
|
||||
UNICHAR_ID uch_id,
|
||||
int *spacing) const {
|
||||
const FontSpacingInfo *prev_fsi = this->get_spacing(prev_uch_id);
|
||||
const FontSpacingInfo *fsi = this->get_spacing(uch_id);
|
||||
if (prev_fsi == NULL || fsi == NULL) return false;
|
||||
int i = 0;
|
||||
for (; i < prev_fsi->kerned_unichar_ids.size(); ++i) {
|
||||
if (prev_fsi->kerned_unichar_ids[i] == uch_id) break;
|
||||
}
|
||||
if (i < prev_fsi->kerned_unichar_ids.size()) {
|
||||
*spacing = prev_fsi->kerned_x_gaps[i];
|
||||
} else {
|
||||
*spacing = prev_fsi->x_gap_after + fsi->x_gap_before;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
bool is_italic() const { return properties & 1; }
|
||||
bool is_bold() const { return (properties & 2) != 0; }
|
||||
bool is_fixed_pitch() const { return (properties & 4) != 0; }
|
||||
bool is_serif() const { return (properties & 8) != 0; }
|
||||
bool is_fraktur() const { return (properties & 16) != 0; }
|
||||
|
||||
char* name;
|
||||
uinT32 properties;
|
||||
// The universal_id is a field reserved for the initialization process
|
||||
// to assign a unique id number to all fonts loaded for the current
|
||||
// combination of languages. This id will then be returned by
|
||||
// ResultIterator::WordFontAttributes.
|
||||
inT32 universal_id;
|
||||
// Horizontal spacing between characters (indexed by UNICHAR_ID).
|
||||
GenericVector<FontSpacingInfo *> *spacing_vec;
|
||||
};
|
||||
|
||||
// Every class (character) owns a FontSet that represents all the fonts that can
|
||||
// render this character.
|
||||
// Since almost all the characters from the same script share the same set of
|
||||
// fonts, the sets are shared over multiple classes (see
|
||||
// Classify::fontset_table_). Thus, a class only store an id to a set.
|
||||
// Because some fonts cannot render just one character of a set, there are a
|
||||
// lot of FontSet that differ only by one font. Rather than storing directly
|
||||
// the FontInfo in the FontSet structure, it's better to share FontInfos among
|
||||
// FontSets (Classify::fontinfo_table_).
|
||||
struct FontSet {
|
||||
int32_t size;
|
||||
int32_t* configs; // FontInfo ids
|
||||
};
|
||||
|
||||
// Class that adds a bit of functionality on top of GenericVector to
|
||||
// implement a table of FontInfo that replaces UniCityTable<FontInfo>.
|
||||
// TODO(rays) change all references once all existing traineddata files
|
||||
// are replaced.
|
||||
class FontInfoTable : public GenericVector<FontInfo> {
|
||||
public:
|
||||
FontInfoTable();
|
||||
~FontInfoTable();
|
||||
|
||||
// Writes to the given file. Returns false in case of error.
|
||||
bool Serialize(FILE* fp) const;
|
||||
// Reads from the given file. Returns false in case of error.
|
||||
// If swap is true, assumes a big/little-endian swap is needed.
|
||||
bool DeSerialize(bool swap, FILE* fp);
|
||||
|
||||
// Returns true if the given set of fonts includes one with the same
|
||||
// properties as font_id.
|
||||
bool SetContainsFontProperties(
|
||||
int font_id, const GenericVector<ScoredFont>& font_set) const;
|
||||
// Returns true if the given set of fonts includes multiple properties.
|
||||
bool SetContainsMultipleFontProperties(
|
||||
const GenericVector<ScoredFont>& font_set) const;
|
||||
|
||||
// Moves any non-empty FontSpacingInfo entries from other to this.
|
||||
void MoveSpacingInfoFrom(FontInfoTable* other);
|
||||
// Moves this to the target unicity table.
|
||||
void MoveTo(UnicityTable<FontInfo>* target);
|
||||
};
|
||||
|
||||
// Compare FontInfo structures.
|
||||
bool CompareFontInfo(const FontInfo& fi1, const FontInfo& fi2);
|
||||
// Compare FontSet structures.
|
||||
bool CompareFontSet(const FontSet& fs1, const FontSet& fs2);
|
||||
// Deletion callbacks for GenericVector.
|
||||
void FontInfoDeleteCallback(FontInfo f);
|
||||
void FontSetDeleteCallback(FontSet fs);
|
||||
|
||||
// Callbacks used by UnicityTable to read/write FontInfo/FontSet structures.
|
||||
bool read_info(FILE* f, FontInfo* fi, bool swap);
|
||||
bool write_info(FILE* f, const FontInfo& fi);
|
||||
bool read_spacing_info(FILE *f, FontInfo* fi, bool swap);
|
||||
bool write_spacing_info(FILE* f, const FontInfo& fi);
|
||||
bool read_set(FILE* f, FontSet* fs, bool swap);
|
||||
bool write_set(FILE* f, const FontSet& fs);
|
||||
|
||||
} // namespace tesseract.
|
||||
|
||||
#endif /* THIRD_PARTY_TESSERACT_CCSTRUCT_FONTINFO_H_ */
|
|
@ -0,0 +1,38 @@
|
|||
/**********************************************************************
|
||||
* File: genblob.cpp (Formerly gblob.c)
|
||||
* Description: Generic Blob processing routines
|
||||
* Author: Phil Cheatle
|
||||
* Created: Mon Nov 25 10:53:26 GMT 1991
|
||||
*
|
||||
* (C) Copyright 1991, Hewlett-Packard Ltd.
|
||||
** Licensed under the Apache License, Version 2.0 (the "License");
|
||||
** you may not use this file except in compliance with the License.
|
||||
** You may obtain a copy of the License at
|
||||
** http://www.apache.org/licenses/LICENSE-2.0
|
||||
** Unless required by applicable law or agreed to in writing, software
|
||||
** distributed under the License is distributed on an "AS IS" BASIS,
|
||||
** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
** See the License for the specific language governing permissions and
|
||||
** limitations under the License.
|
||||
*
|
||||
**********************************************************************/
|
||||
|
||||
#include "genblob.h"
|
||||
#include "stepblob.h"
|
||||
|
||||
/**********************************************************************
|
||||
* c_blob_comparator()
|
||||
*
|
||||
* Blob comparator used to sort a blob list so that blobs are in increasing
|
||||
* order of left edge.
|
||||
**********************************************************************/
|
||||
|
||||
int c_blob_comparator( // sort blobs
|
||||
const void *blob1p, // ptr to ptr to blob1
|
||||
const void *blob2p // ptr to ptr to blob2
|
||||
) {
|
||||
C_BLOB *blob1 = *(C_BLOB **) blob1p;
|
||||
C_BLOB *blob2 = *(C_BLOB **) blob2p;
|
||||
|
||||
return blob1->bounding_box ().left () - blob2->bounding_box ().left ();
|
||||
}
|
|
@ -0,0 +1,27 @@
|
|||
/**********************************************************************
|
||||
* File: genblob.h (Formerly gblob.h)
|
||||
* Description: Generic Blob processing routines
|
||||
* Author: Phil Cheatle
|
||||
* Created: Mon Nov 25 10:53:26 GMT 1991
|
||||
*
|
||||
* (C) Copyright 1991, Hewlett-Packard Ltd.
|
||||
** Licensed under the Apache License, Version 2.0 (the "License");
|
||||
** you may not use this file except in compliance with the License.
|
||||
** You may obtain a copy of the License at
|
||||
** http://www.apache.org/licenses/LICENSE-2.0
|
||||
** Unless required by applicable law or agreed to in writing, software
|
||||
** distributed under the License is distributed on an "AS IS" BASIS,
|
||||
** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
** See the License for the specific language governing permissions and
|
||||
** limitations under the License.
|
||||
*
|
||||
**********************************************************************/
|
||||
|
||||
#ifndef GENBLOB_H
|
||||
#define GENBLOB_H
|
||||
|
||||
// Sort function to sort blobs by ascending left edge.
|
||||
int c_blob_comparator(const void *blob1p, // ptr to ptr to blob1
|
||||
const void *blob2p);
|
||||
|
||||
#endif
|
|
@ -0,0 +1,17 @@
|
|||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
#ifndef HPDSIZES_H
|
||||
#define HPDSIZES_H
|
||||
|
||||
#define NUM_TEXT_ATTR 10
|
||||
#define NUM_BLOCK_ATTR 7
|
||||
#define MAXLENGTH 128
|
||||
#define NUM_BACKGROUNDS 8
|
||||
#endif
|
|
@ -0,0 +1,699 @@
|
|||
///////////////////////////////////////////////////////////////////////
|
||||
// File: imagedata.h
|
||||
// Description: Class to hold information about a single multi-page tiff
|
||||
// training file and its corresponding boxes or text file.
|
||||
// Author: Ray Smith
|
||||
// Created: Tue May 28 08:56:06 PST 2013
|
||||
//
|
||||
// (C) Copyright 2013, Google Inc.
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
///////////////////////////////////////////////////////////////////////
|
||||
|
||||
// Include automatically generated configuration file if running autoconf.
|
||||
#ifdef HAVE_CONFIG_H
|
||||
#include "config_auto.h"
|
||||
#endif
|
||||
|
||||
#include "imagedata.h"
|
||||
|
||||
#include "allheaders.h"
|
||||
#include "boxread.h"
|
||||
#include "callcpp.h"
|
||||
#include "helpers.h"
|
||||
#include "tprintf.h"
|
||||
|
||||
#if defined(__MINGW32__)
|
||||
# include <unistd.h>
|
||||
#elif __cplusplus > 199711L // in C++11
|
||||
# include <thread>
|
||||
#endif
|
||||
|
||||
// Number of documents to read ahead while training. Doesn't need to be very
|
||||
// large.
|
||||
const int kMaxReadAhead = 8;
|
||||
|
||||
namespace tesseract {
|
||||
|
||||
WordFeature::WordFeature() : x_(0), y_(0), dir_(0) {
|
||||
}
|
||||
|
||||
WordFeature::WordFeature(const FCOORD& fcoord, uinT8 dir)
|
||||
: x_(IntCastRounded(fcoord.x())),
|
||||
y_(ClipToRange(IntCastRounded(fcoord.y()), 0, MAX_UINT8)),
|
||||
dir_(dir) {
|
||||
}
|
||||
|
||||
// Computes the maximum x and y value in the features.
|
||||
void WordFeature::ComputeSize(const GenericVector<WordFeature>& features,
|
||||
int* max_x, int* max_y) {
|
||||
*max_x = 0;
|
||||
*max_y = 0;
|
||||
for (int f = 0; f < features.size(); ++f) {
|
||||
if (features[f].x_ > *max_x) *max_x = features[f].x_;
|
||||
if (features[f].y_ > *max_y) *max_y = features[f].y_;
|
||||
}
|
||||
}
|
||||
|
||||
// Draws the features in the given window.
|
||||
void WordFeature::Draw(const GenericVector<WordFeature>& features,
|
||||
ScrollView* window) {
|
||||
#ifndef GRAPHICS_DISABLED
|
||||
for (int f = 0; f < features.size(); ++f) {
|
||||
FCOORD pos(features[f].x_, features[f].y_);
|
||||
FCOORD dir;
|
||||
dir.from_direction(features[f].dir_);
|
||||
dir *= 8.0f;
|
||||
window->SetCursor(IntCastRounded(pos.x() - dir.x()),
|
||||
IntCastRounded(pos.y() - dir.y()));
|
||||
window->DrawTo(IntCastRounded(pos.x() + dir.x()),
|
||||
IntCastRounded(pos.y() + dir.y()));
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
// Writes to the given file. Returns false in case of error.
|
||||
bool WordFeature::Serialize(FILE* fp) const {
|
||||
if (fwrite(&x_, sizeof(x_), 1, fp) != 1) return false;
|
||||
if (fwrite(&y_, sizeof(y_), 1, fp) != 1) return false;
|
||||
if (fwrite(&dir_, sizeof(dir_), 1, fp) != 1) return false;
|
||||
return true;
|
||||
}
|
||||
// Reads from the given file. Returns false in case of error.
|
||||
// If swap is true, assumes a big/little-endian swap is needed.
|
||||
bool WordFeature::DeSerialize(bool swap, FILE* fp) {
|
||||
if (fread(&x_, sizeof(x_), 1, fp) != 1) return false;
|
||||
if (swap) ReverseN(&x_, sizeof(x_));
|
||||
if (fread(&y_, sizeof(y_), 1, fp) != 1) return false;
|
||||
if (fread(&dir_, sizeof(dir_), 1, fp) != 1) return false;
|
||||
return true;
|
||||
}
|
||||
|
||||
void FloatWordFeature::FromWordFeatures(
|
||||
const GenericVector<WordFeature>& word_features,
|
||||
GenericVector<FloatWordFeature>* float_features) {
|
||||
for (int i = 0; i < word_features.size(); ++i) {
|
||||
FloatWordFeature f;
|
||||
f.x = word_features[i].x();
|
||||
f.y = word_features[i].y();
|
||||
f.dir = word_features[i].dir();
|
||||
f.x_bucket = 0; // Will set it later.
|
||||
float_features->push_back(f);
|
||||
}
|
||||
}
|
||||
|
||||
// Sort function to sort first by x-bucket, then by y.
|
||||
/* static */
|
||||
int FloatWordFeature::SortByXBucket(const void* v1, const void* v2) {
|
||||
const FloatWordFeature* f1 = reinterpret_cast<const FloatWordFeature*>(v1);
|
||||
const FloatWordFeature* f2 = reinterpret_cast<const FloatWordFeature*>(v2);
|
||||
int x_diff = f1->x_bucket - f2->x_bucket;
|
||||
if (x_diff == 0) return f1->y - f2->y;
|
||||
return x_diff;
|
||||
}
|
||||
|
||||
ImageData::ImageData() : page_number_(-1), vertical_text_(false) {
|
||||
}
|
||||
// Takes ownership of the pix and destroys it.
|
||||
ImageData::ImageData(bool vertical, Pix* pix)
|
||||
: page_number_(0), vertical_text_(vertical) {
|
||||
SetPix(pix);
|
||||
}
|
||||
ImageData::~ImageData() {
|
||||
}
|
||||
|
||||
// Builds and returns an ImageData from the basic data. Note that imagedata,
|
||||
// truth_text, and box_text are all the actual file data, NOT filenames.
|
||||
ImageData* ImageData::Build(const char* name, int page_number, const char* lang,
|
||||
const char* imagedata, int imagedatasize,
|
||||
const char* truth_text, const char* box_text) {
|
||||
ImageData* image_data = new ImageData();
|
||||
image_data->imagefilename_ = name;
|
||||
image_data->page_number_ = page_number;
|
||||
image_data->language_ = lang;
|
||||
// Save the imagedata.
|
||||
image_data->image_data_.resize_no_init(imagedatasize);
|
||||
memcpy(&image_data->image_data_[0], imagedata, imagedatasize);
|
||||
if (!image_data->AddBoxes(box_text)) {
|
||||
if (truth_text == NULL || truth_text[0] == '\0') {
|
||||
tprintf("Error: No text corresponding to page %d from image %s!\n",
|
||||
page_number, name);
|
||||
delete image_data;
|
||||
return NULL;
|
||||
}
|
||||
image_data->transcription_ = truth_text;
|
||||
// If we have no boxes, the transcription is in the 0th box_texts_.
|
||||
image_data->box_texts_.push_back(truth_text);
|
||||
// We will create a box for the whole image on PreScale, to save unpacking
|
||||
// the image now.
|
||||
} else if (truth_text != NULL && truth_text[0] != '\0' &&
|
||||
image_data->transcription_ != truth_text) {
|
||||
// Save the truth text as it is present and disagrees with the box text.
|
||||
image_data->transcription_ = truth_text;
|
||||
}
|
||||
return image_data;
|
||||
}
|
||||
|
||||
// Writes to the given file. Returns false in case of error.
|
||||
bool ImageData::Serialize(TFile* fp) const {
|
||||
if (!imagefilename_.Serialize(fp)) return false;
|
||||
if (fp->FWrite(&page_number_, sizeof(page_number_), 1) != 1) return false;
|
||||
if (!image_data_.Serialize(fp)) return false;
|
||||
if (!transcription_.Serialize(fp)) return false;
|
||||
// WARNING: Will not work across different endian machines.
|
||||
if (!boxes_.Serialize(fp)) return false;
|
||||
if (!box_texts_.SerializeClasses(fp)) return false;
|
||||
inT8 vertical = vertical_text_;
|
||||
if (fp->FWrite(&vertical, sizeof(vertical), 1) != 1) return false;
|
||||
return true;
|
||||
}
|
||||
|
||||
// Reads from the given file. Returns false in case of error.
|
||||
// If swap is true, assumes a big/little-endian swap is needed.
|
||||
bool ImageData::DeSerialize(bool swap, TFile* fp) {
|
||||
if (!imagefilename_.DeSerialize(swap, fp)) return false;
|
||||
if (fp->FRead(&page_number_, sizeof(page_number_), 1) != 1) return false;
|
||||
if (swap) ReverseN(&page_number_, sizeof(page_number_));
|
||||
if (!image_data_.DeSerialize(swap, fp)) return false;
|
||||
if (!transcription_.DeSerialize(swap, fp)) return false;
|
||||
// WARNING: Will not work across different endian machines.
|
||||
if (!boxes_.DeSerialize(swap, fp)) return false;
|
||||
if (!box_texts_.DeSerializeClasses(swap, fp)) return false;
|
||||
inT8 vertical = 0;
|
||||
if (fp->FRead(&vertical, sizeof(vertical), 1) != 1) return false;
|
||||
vertical_text_ = vertical != 0;
|
||||
return true;
|
||||
}
|
||||
|
||||
// As DeSerialize, but only seeks past the data - hence a static method.
|
||||
bool ImageData::SkipDeSerialize(bool swap, TFile* fp) {
|
||||
if (!STRING::SkipDeSerialize(swap, fp)) return false;
|
||||
inT32 page_number;
|
||||
if (fp->FRead(&page_number, sizeof(page_number), 1) != 1) return false;
|
||||
if (!GenericVector<char>::SkipDeSerialize(swap, fp)) return false;
|
||||
if (!STRING::SkipDeSerialize(swap, fp)) return false;
|
||||
if (!GenericVector<TBOX>::SkipDeSerialize(swap, fp)) return false;
|
||||
if (!GenericVector<STRING>::SkipDeSerializeClasses(swap, fp)) return false;
|
||||
inT8 vertical = 0;
|
||||
return fp->FRead(&vertical, sizeof(vertical), 1) == 1;
|
||||
}
|
||||
|
||||
// Saves the given Pix as a PNG-encoded string and destroys it.
|
||||
void ImageData::SetPix(Pix* pix) {
|
||||
SetPixInternal(pix, &image_data_);
|
||||
}
|
||||
|
||||
// Returns the Pix image for *this. Must be pixDestroyed after use.
|
||||
Pix* ImageData::GetPix() const {
|
||||
return GetPixInternal(image_data_);
|
||||
}
|
||||
|
||||
// Gets anything and everything with a non-NULL pointer, prescaled to a
|
||||
// given target_height (if 0, then the original image height), and aligned.
|
||||
// Also returns (if not NULL) the width and height of the scaled image.
|
||||
// The return value is the scaled Pix, which must be pixDestroyed after use,
|
||||
// and scale_factor (if not NULL) is set to the scale factor that was applied
|
||||
// to the image to achieve the target_height.
|
||||
Pix* ImageData::PreScale(int target_height, int max_height, float* scale_factor,
|
||||
int* scaled_width, int* scaled_height,
|
||||
GenericVector<TBOX>* boxes) const {
|
||||
int input_width = 0;
|
||||
int input_height = 0;
|
||||
Pix* src_pix = GetPix();
|
||||
ASSERT_HOST(src_pix != NULL);
|
||||
input_width = pixGetWidth(src_pix);
|
||||
input_height = pixGetHeight(src_pix);
|
||||
if (target_height == 0) {
|
||||
target_height = MIN(input_height, max_height);
|
||||
}
|
||||
float im_factor = static_cast<float>(target_height) / input_height;
|
||||
if (scaled_width != NULL)
|
||||
*scaled_width = IntCastRounded(im_factor * input_width);
|
||||
if (scaled_height != NULL)
|
||||
*scaled_height = target_height;
|
||||
// Get the scaled image.
|
||||
Pix* pix = pixScale(src_pix, im_factor, im_factor);
|
||||
if (pix == NULL) {
|
||||
tprintf("Scaling pix of size %d, %d by factor %g made null pix!!\n",
|
||||
input_width, input_height, im_factor);
|
||||
}
|
||||
if (scaled_width != NULL) *scaled_width = pixGetWidth(pix);
|
||||
if (scaled_height != NULL) *scaled_height = pixGetHeight(pix);
|
||||
pixDestroy(&src_pix);
|
||||
if (boxes != NULL) {
|
||||
// Get the boxes.
|
||||
boxes->truncate(0);
|
||||
for (int b = 0; b < boxes_.size(); ++b) {
|
||||
TBOX box = boxes_[b];
|
||||
box.scale(im_factor);
|
||||
boxes->push_back(box);
|
||||
}
|
||||
if (boxes->empty()) {
|
||||
// Make a single box for the whole image.
|
||||
TBOX box(0, 0, im_factor * input_width, target_height);
|
||||
boxes->push_back(box);
|
||||
}
|
||||
}
|
||||
if (scale_factor != NULL) *scale_factor = im_factor;
|
||||
return pix;
|
||||
}
|
||||
|
||||
int ImageData::MemoryUsed() const {
|
||||
return image_data_.size();
|
||||
}
|
||||
|
||||
// Draws the data in a new window.
|
||||
void ImageData::Display() const {
|
||||
#ifndef GRAPHICS_DISABLED
|
||||
const int kTextSize = 64;
|
||||
// Draw the image.
|
||||
Pix* pix = GetPix();
|
||||
if (pix == NULL) return;
|
||||
int width = pixGetWidth(pix);
|
||||
int height = pixGetHeight(pix);
|
||||
ScrollView* win = new ScrollView("Imagedata", 100, 100,
|
||||
2 * (width + 2 * kTextSize),
|
||||
2 * (height + 4 * kTextSize),
|
||||
width + 10, height + 3 * kTextSize, true);
|
||||
win->Image(pix, 0, height - 1);
|
||||
pixDestroy(&pix);
|
||||
// Draw the boxes.
|
||||
win->Pen(ScrollView::RED);
|
||||
win->Brush(ScrollView::NONE);
|
||||
int text_size = kTextSize;
|
||||
if (!boxes_.empty() && boxes_[0].height() * 2 < text_size)
|
||||
text_size = boxes_[0].height() * 2;
|
||||
win->TextAttributes("Arial", text_size, false, false, false);
|
||||
if (!boxes_.empty()) {
|
||||
for (int b = 0; b < boxes_.size(); ++b) {
|
||||
boxes_[b].plot(win);
|
||||
win->Text(boxes_[b].left(), height + kTextSize, box_texts_[b].string());
|
||||
}
|
||||
} else {
|
||||
// The full transcription.
|
||||
win->Pen(ScrollView::CYAN);
|
||||
win->Text(0, height + kTextSize * 2, transcription_.string());
|
||||
}
|
||||
win->Update();
|
||||
window_wait(win);
|
||||
#endif
|
||||
}
|
||||
|
||||
// Adds the supplied boxes and transcriptions that correspond to the correct
|
||||
// page number.
|
||||
void ImageData::AddBoxes(const GenericVector<TBOX>& boxes,
|
||||
const GenericVector<STRING>& texts,
|
||||
const GenericVector<int>& box_pages) {
|
||||
// Copy the boxes and make the transcription.
|
||||
for (int i = 0; i < box_pages.size(); ++i) {
|
||||
if (page_number_ >= 0 && box_pages[i] != page_number_) continue;
|
||||
transcription_ += texts[i];
|
||||
boxes_.push_back(boxes[i]);
|
||||
box_texts_.push_back(texts[i]);
|
||||
}
|
||||
}
|
||||
|
||||
// Saves the given Pix as a PNG-encoded string and destroys it.
|
||||
void ImageData::SetPixInternal(Pix* pix, GenericVector<char>* image_data) {
|
||||
l_uint8* data;
|
||||
size_t size;
|
||||
pixWriteMem(&data, &size, pix, IFF_PNG);
|
||||
pixDestroy(&pix);
|
||||
image_data->resize_no_init(size);
|
||||
memcpy(&(*image_data)[0], data, size);
|
||||
free(data);
|
||||
}
|
||||
|
||||
// Returns the Pix image for the image_data. Must be pixDestroyed after use.
|
||||
Pix* ImageData::GetPixInternal(const GenericVector<char>& image_data) {
|
||||
Pix* pix = NULL;
|
||||
if (!image_data.empty()) {
|
||||
// Convert the array to an image.
|
||||
const unsigned char* u_data =
|
||||
reinterpret_cast<const unsigned char*>(&image_data[0]);
|
||||
pix = pixReadMem(u_data, image_data.size());
|
||||
}
|
||||
return pix;
|
||||
}
|
||||
|
||||
// Parses the text string as a box file and adds any discovered boxes that
|
||||
// match the page number. Returns false on error.
|
||||
bool ImageData::AddBoxes(const char* box_text) {
|
||||
if (box_text != NULL && box_text[0] != '\0') {
|
||||
GenericVector<TBOX> boxes;
|
||||
GenericVector<STRING> texts;
|
||||
GenericVector<int> box_pages;
|
||||
if (ReadMemBoxes(page_number_, false, box_text, &boxes,
|
||||
&texts, NULL, &box_pages)) {
|
||||
AddBoxes(boxes, texts, box_pages);
|
||||
return true;
|
||||
} else {
|
||||
tprintf("Error: No boxes for page %d from image %s!\n",
|
||||
page_number_, imagefilename_.string());
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
// Thread function to call ReCachePages.
|
||||
void* ReCachePagesFunc(void* data) {
|
||||
DocumentData* document_data = reinterpret_cast<DocumentData*>(data);
|
||||
document_data->ReCachePages();
|
||||
return NULL;
|
||||
}
|
||||
|
||||
DocumentData::DocumentData(const STRING& name)
|
||||
: document_name_(name),
|
||||
pages_offset_(-1),
|
||||
total_pages_(-1),
|
||||
memory_used_(0),
|
||||
max_memory_(0),
|
||||
reader_(NULL) {}
|
||||
|
||||
DocumentData::~DocumentData() {
|
||||
SVAutoLock lock_p(&pages_mutex_);
|
||||
SVAutoLock lock_g(&general_mutex_);
|
||||
}
|
||||
|
||||
// Reads all the pages in the given lstmf filename to the cache. The reader
|
||||
// is used to read the file.
|
||||
bool DocumentData::LoadDocument(const char* filename, const char* lang,
|
||||
int start_page, inT64 max_memory,
|
||||
FileReader reader) {
|
||||
SetDocument(filename, lang, max_memory, reader);
|
||||
pages_offset_ = start_page;
|
||||
return ReCachePages();
|
||||
}
|
||||
|
||||
// Sets up the document, without actually loading it.
|
||||
void DocumentData::SetDocument(const char* filename, const char* lang,
|
||||
inT64 max_memory, FileReader reader) {
|
||||
SVAutoLock lock_p(&pages_mutex_);
|
||||
SVAutoLock lock(&general_mutex_);
|
||||
document_name_ = filename;
|
||||
lang_ = lang;
|
||||
pages_offset_ = -1;
|
||||
max_memory_ = max_memory;
|
||||
reader_ = reader;
|
||||
}
|
||||
|
||||
// Writes all the pages to the given filename. Returns false on error.
|
||||
bool DocumentData::SaveDocument(const char* filename, FileWriter writer) {
|
||||
SVAutoLock lock(&pages_mutex_);
|
||||
TFile fp;
|
||||
fp.OpenWrite(NULL);
|
||||
if (!pages_.Serialize(&fp) || !fp.CloseWrite(filename, writer)) {
|
||||
tprintf("Serialize failed: %s\n", filename);
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
bool DocumentData::SaveToBuffer(GenericVector<char>* buffer) {
|
||||
SVAutoLock lock(&pages_mutex_);
|
||||
TFile fp;
|
||||
fp.OpenWrite(buffer);
|
||||
return pages_.Serialize(&fp);
|
||||
}
|
||||
|
||||
// Adds the given page data to this document, counting up memory.
|
||||
void DocumentData::AddPageToDocument(ImageData* page) {
|
||||
SVAutoLock lock(&pages_mutex_);
|
||||
pages_.push_back(page);
|
||||
set_memory_used(memory_used() + page->MemoryUsed());
|
||||
}
|
||||
|
||||
// If the given index is not currently loaded, loads it using a separate
|
||||
// thread.
|
||||
void DocumentData::LoadPageInBackground(int index) {
|
||||
ImageData* page = NULL;
|
||||
if (IsPageAvailable(index, &page)) return;
|
||||
SVAutoLock lock(&pages_mutex_);
|
||||
if (pages_offset_ == index) return;
|
||||
pages_offset_ = index;
|
||||
pages_.clear();
|
||||
#ifndef GRAPHICS_DISABLED
|
||||
SVSync::StartThread(ReCachePagesFunc, this);
|
||||
#endif // GRAPHICS_DISABLED
|
||||
}
|
||||
|
||||
// Returns a pointer to the page with the given index, modulo the total
|
||||
// number of pages. Blocks until the background load is completed.
|
||||
const ImageData* DocumentData::GetPage(int index) {
|
||||
ImageData* page = NULL;
|
||||
while (!IsPageAvailable(index, &page)) {
|
||||
// If there is no background load scheduled, schedule one now.
|
||||
pages_mutex_.Lock();
|
||||
bool needs_loading = pages_offset_ != index;
|
||||
pages_mutex_.Unlock();
|
||||
if (needs_loading) LoadPageInBackground(index);
|
||||
// We can't directly load the page, or the background load will delete it
|
||||
// while the caller is using it, so give it a chance to work.
|
||||
#if __cplusplus > 199711L && !defined(__MINGW32__)
|
||||
std::this_thread::sleep_for(std::chrono::seconds(1));
|
||||
#elif _WIN32 // MSVS
|
||||
Sleep(1000);
|
||||
#else
|
||||
sleep(1);
|
||||
#endif
|
||||
}
|
||||
return page;
|
||||
}
|
||||
|
||||
// Returns true if the requested page is available, and provides a pointer,
|
||||
// which may be NULL if the document is empty. May block, even though it
|
||||
// doesn't guarantee to return true.
|
||||
bool DocumentData::IsPageAvailable(int index, ImageData** page) {
|
||||
SVAutoLock lock(&pages_mutex_);
|
||||
int num_pages = NumPages();
|
||||
if (num_pages == 0 || index < 0) {
|
||||
*page = NULL; // Empty Document.
|
||||
return true;
|
||||
}
|
||||
if (num_pages > 0) {
|
||||
index = Modulo(index, num_pages);
|
||||
if (pages_offset_ <= index && index < pages_offset_ + pages_.size()) {
|
||||
*page = pages_[index - pages_offset_]; // Page is available already.
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
// Removes all pages from memory and frees the memory, but does not forget
|
||||
// the document metadata.
|
||||
inT64 DocumentData::UnCache() {
|
||||
SVAutoLock lock(&pages_mutex_);
|
||||
inT64 memory_saved = memory_used();
|
||||
pages_.clear();
|
||||
pages_offset_ = -1;
|
||||
set_total_pages(-1);
|
||||
set_memory_used(0);
|
||||
tprintf("Unloaded document %s, saving %d memory\n", document_name_.string(),
|
||||
memory_saved);
|
||||
return memory_saved;
|
||||
}
|
||||
|
||||
// Locks the pages_mutex_ and Loads as many pages can fit in max_memory_
|
||||
// starting at index pages_offset_.
|
||||
bool DocumentData::ReCachePages() {
|
||||
SVAutoLock lock(&pages_mutex_);
|
||||
// Read the file.
|
||||
set_total_pages(0);
|
||||
set_memory_used(0);
|
||||
int loaded_pages = 0;
|
||||
pages_.truncate(0);
|
||||
TFile fp;
|
||||
if (!fp.Open(document_name_, reader_) ||
|
||||
!PointerVector<ImageData>::DeSerializeSize(false, &fp, &loaded_pages) ||
|
||||
loaded_pages <= 0) {
|
||||
tprintf("Deserialize header failed: %s\n", document_name_.string());
|
||||
return false;
|
||||
}
|
||||
pages_offset_ %= loaded_pages;
|
||||
// Skip pages before the first one we want, and load the rest until max
|
||||
// memory and skip the rest after that.
|
||||
int page;
|
||||
for (page = 0; page < loaded_pages; ++page) {
|
||||
if (page < pages_offset_ ||
|
||||
(max_memory_ > 0 && memory_used() > max_memory_)) {
|
||||
if (!PointerVector<ImageData>::DeSerializeSkip(false, &fp)) break;
|
||||
} else {
|
||||
if (!pages_.DeSerializeElement(false, &fp)) break;
|
||||
ImageData* image_data = pages_.back();
|
||||
if (image_data->imagefilename().length() == 0) {
|
||||
image_data->set_imagefilename(document_name_);
|
||||
image_data->set_page_number(page);
|
||||
}
|
||||
image_data->set_language(lang_);
|
||||
set_memory_used(memory_used() + image_data->MemoryUsed());
|
||||
}
|
||||
}
|
||||
if (page < loaded_pages) {
|
||||
tprintf("Deserialize failed: %s read %d/%d pages\n",
|
||||
document_name_.string(), page, loaded_pages);
|
||||
pages_.truncate(0);
|
||||
} else {
|
||||
tprintf("Loaded %d/%d pages (%d-%d) of document %s\n", pages_.size(),
|
||||
loaded_pages, pages_offset_, pages_offset_ + pages_.size(),
|
||||
document_name_.string());
|
||||
}
|
||||
set_total_pages(loaded_pages);
|
||||
return !pages_.empty();
|
||||
}
|
||||
|
||||
// A collection of DocumentData that knows roughly how much memory it is using.
|
||||
DocumentCache::DocumentCache(inT64 max_memory)
|
||||
: num_pages_per_doc_(0), max_memory_(max_memory) {}
|
||||
DocumentCache::~DocumentCache() {}
|
||||
|
||||
// Adds all the documents in the list of filenames, counting memory.
|
||||
// The reader is used to read the files.
|
||||
bool DocumentCache::LoadDocuments(const GenericVector<STRING>& filenames,
|
||||
const char* lang,
|
||||
CachingStrategy cache_strategy,
|
||||
FileReader reader) {
|
||||
cache_strategy_ = cache_strategy;
|
||||
inT64 fair_share_memory = 0;
|
||||
// In the round-robin case, each DocumentData handles restricting its content
|
||||
// to its fair share of memory. In the sequential case, DocumentCache
|
||||
// determines which DocumentDatas are held entirely in memory.
|
||||
if (cache_strategy_ == CS_ROUND_ROBIN)
|
||||
fair_share_memory = max_memory_ / filenames.size();
|
||||
for (int arg = 0; arg < filenames.size(); ++arg) {
|
||||
STRING filename = filenames[arg];
|
||||
DocumentData* document = new DocumentData(filename);
|
||||
document->SetDocument(filename.string(), lang, fair_share_memory, reader);
|
||||
AddToCache(document);
|
||||
}
|
||||
if (!documents_.empty()) {
|
||||
// Try to get the first page now to verify the list of filenames.
|
||||
if (GetPageBySerial(0) != NULL) return true;
|
||||
tprintf("Load of page 0 failed!\n");
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
// Adds document to the cache.
|
||||
bool DocumentCache::AddToCache(DocumentData* data) {
|
||||
inT64 new_memory = data->memory_used();
|
||||
documents_.push_back(data);
|
||||
return true;
|
||||
}
|
||||
|
||||
// Finds and returns a document by name.
|
||||
DocumentData* DocumentCache::FindDocument(const STRING& document_name) const {
|
||||
for (int i = 0; i < documents_.size(); ++i) {
|
||||
if (documents_[i]->document_name() == document_name)
|
||||
return documents_[i];
|
||||
}
|
||||
return NULL;
|
||||
}
|
||||
|
||||
// Returns the total number of pages in an epoch. For CS_ROUND_ROBIN cache
|
||||
// strategy, could take a long time.
|
||||
int DocumentCache::TotalPages() {
|
||||
if (cache_strategy_ == CS_SEQUENTIAL) {
|
||||
// In sequential mode, we assume each doc has the same number of pages
|
||||
// whether it is true or not.
|
||||
if (num_pages_per_doc_ == 0) GetPageSequential(0);
|
||||
return num_pages_per_doc_ * documents_.size();
|
||||
}
|
||||
int total_pages = 0;
|
||||
int num_docs = documents_.size();
|
||||
for (int d = 0; d < num_docs; ++d) {
|
||||
// We have to load a page to make NumPages() valid.
|
||||
documents_[d]->GetPage(0);
|
||||
total_pages += documents_[d]->NumPages();
|
||||
}
|
||||
return total_pages;
|
||||
}
|
||||
|
||||
// Returns a page by serial number, selecting them in a round-robin fashion
|
||||
// from all the documents. Highly disk-intensive, but doesn't need samples
|
||||
// to be shuffled between files to begin with.
|
||||
const ImageData* DocumentCache::GetPageRoundRobin(int serial) {
|
||||
int num_docs = documents_.size();
|
||||
int doc_index = serial % num_docs;
|
||||
const ImageData* doc = documents_[doc_index]->GetPage(serial / num_docs);
|
||||
for (int offset = 1; offset <= kMaxReadAhead && offset < num_docs; ++offset) {
|
||||
doc_index = (serial + offset) % num_docs;
|
||||
int page = (serial + offset) / num_docs;
|
||||
documents_[doc_index]->LoadPageInBackground(page);
|
||||
}
|
||||
return doc;
|
||||
}
|
||||
|
||||
// Returns a page by serial number, selecting them in sequence from each file.
|
||||
// Requires the samples to be shuffled between the files to give a random or
|
||||
// uniform distribution of data. Less disk-intensive than GetPageRoundRobin.
|
||||
const ImageData* DocumentCache::GetPageSequential(int serial) {
|
||||
int num_docs = documents_.size();
|
||||
ASSERT_HOST(num_docs > 0);
|
||||
if (num_pages_per_doc_ == 0) {
|
||||
// Use the pages in the first doc as the number of pages in each doc.
|
||||
documents_[0]->GetPage(0);
|
||||
num_pages_per_doc_ = documents_[0]->NumPages();
|
||||
if (num_pages_per_doc_ == 0) {
|
||||
tprintf("First document cannot be empty!!\n");
|
||||
ASSERT_HOST(num_pages_per_doc_ > 0);
|
||||
}
|
||||
// Get rid of zero now if we don't need it.
|
||||
if (serial / num_pages_per_doc_ % num_docs > 0) documents_[0]->UnCache();
|
||||
}
|
||||
int doc_index = serial / num_pages_per_doc_ % num_docs;
|
||||
const ImageData* doc =
|
||||
documents_[doc_index]->GetPage(serial % num_pages_per_doc_);
|
||||
// Count up total memory. Background loading makes it more complicated to
|
||||
// keep a running count.
|
||||
inT64 total_memory = 0;
|
||||
for (int d = 0; d < num_docs; ++d) {
|
||||
total_memory += documents_[d]->memory_used();
|
||||
}
|
||||
if (total_memory >= max_memory_) {
|
||||
// Find something to un-cache.
|
||||
// If there are more than 3 in front, then serial is from the back reader
|
||||
// of a pair of readers. If we un-cache from in-front-2 to 2-ahead, then
|
||||
// we create a hole between them and then un-caching the backmost occupied
|
||||
// will work for both.
|
||||
int num_in_front = CountNeighbourDocs(doc_index, 1);
|
||||
for (int offset = num_in_front - 2;
|
||||
offset > 1 && total_memory >= max_memory_; --offset) {
|
||||
int next_index = (doc_index + offset) % num_docs;
|
||||
total_memory -= documents_[next_index]->UnCache();
|
||||
}
|
||||
// If that didn't work, the best solution is to un-cache from the back. If
|
||||
// we take away the document that a 2nd reader is using, it will put it
|
||||
// back and make a hole between.
|
||||
int num_behind = CountNeighbourDocs(doc_index, -1);
|
||||
for (int offset = num_behind; offset < 0 && total_memory >= max_memory_;
|
||||
++offset) {
|
||||
int next_index = (doc_index + offset + num_docs) % num_docs;
|
||||
total_memory -= documents_[next_index]->UnCache();
|
||||
}
|
||||
}
|
||||
int next_index = (doc_index + 1) % num_docs;
|
||||
if (!documents_[next_index]->IsCached() && total_memory < max_memory_) {
|
||||
documents_[next_index]->LoadPageInBackground(0);
|
||||
}
|
||||
return doc;
|
||||
}
|
||||
|
||||
// Helper counts the number of adjacent cached neighbours of index looking in
|
||||
// direction dir, ie index+dir, index+2*dir etc.
|
||||
int DocumentCache::CountNeighbourDocs(int index, int dir) {
|
||||
int num_docs = documents_.size();
|
||||
for (int offset = dir; abs(offset) < num_docs; offset += dir) {
|
||||
int offset_index = (index + offset + num_docs) % num_docs;
|
||||
if (!documents_[offset_index]->IsCached()) return offset - dir;
|
||||
}
|
||||
return num_docs;
|
||||
}
|
||||
|
||||
} // namespace tesseract.
|
|
@ -0,0 +1,379 @@
|
|||
///////////////////////////////////////////////////////////////////////
|
||||
// File: imagedata.h
|
||||
// Description: Class to hold information about a single image and its
|
||||
// corresponding boxes or text file.
|
||||
// Author: Ray Smith
|
||||
// Created: Mon Jul 22 14:17:06 PDT 2013
|
||||
//
|
||||
// (C) Copyright 2013, Google Inc.
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
///////////////////////////////////////////////////////////////////////
|
||||
|
||||
#ifndef TESSERACT_IMAGE_IMAGEDATA_H_
|
||||
#define TESSERACT_IMAGE_IMAGEDATA_H_
|
||||
|
||||
|
||||
#include "genericvector.h"
|
||||
#include "normalis.h"
|
||||
#include "rect.h"
|
||||
#include "strngs.h"
|
||||
#include "svutil.h"
|
||||
|
||||
struct Pix;
|
||||
|
||||
namespace tesseract {
|
||||
|
||||
// Amount of padding to apply in output pixels in feature mode.
|
||||
const int kFeaturePadding = 2;
|
||||
// Number of pixels to pad around text boxes.
|
||||
const int kImagePadding = 4;
|
||||
|
||||
// Enum to determine the caching and data sequencing strategy.
|
||||
enum CachingStrategy {
|
||||
// Reads all of one file before moving on to the next. Requires samples to be
|
||||
// shuffled across files. Uses the count of samples in the first file as
|
||||
// the count in all the files to achieve high-speed random access. As a
|
||||
// consequence, if subsequent files are smaller, they get entries used more
|
||||
// than once, and if subsequent files are larger, some entries are not used.
|
||||
// Best for larger data sets that don't fit in memory.
|
||||
CS_SEQUENTIAL,
|
||||
// Reads one sample from each file in rotation. Does not require shuffled
|
||||
// samples, but is extremely disk-intensive. Samples in smaller files also
|
||||
// get used more often than samples in larger files.
|
||||
// Best for smaller data sets that mostly fit in memory.
|
||||
CS_ROUND_ROBIN,
|
||||
};
|
||||
|
||||
class WordFeature {
|
||||
public:
|
||||
WordFeature();
|
||||
WordFeature(const FCOORD& fcoord, uinT8 dir);
|
||||
|
||||
// Computes the maximum x and y value in the features.
|
||||
static void ComputeSize(const GenericVector<WordFeature>& features,
|
||||
int* max_x, int* max_y);
|
||||
// Draws the features in the given window.
|
||||
static void Draw(const GenericVector<WordFeature>& features,
|
||||
ScrollView* window);
|
||||
|
||||
// Accessors.
|
||||
int x() const { return x_; }
|
||||
int y() const { return y_; }
|
||||
int dir() const { return dir_; }
|
||||
|
||||
// Writes to the given file. Returns false in case of error.
|
||||
bool Serialize(FILE* fp) const;
|
||||
// Reads from the given file. Returns false in case of error.
|
||||
// If swap is true, assumes a big/little-endian swap is needed.
|
||||
bool DeSerialize(bool swap, FILE* fp);
|
||||
|
||||
private:
|
||||
inT16 x_;
|
||||
uinT8 y_;
|
||||
uinT8 dir_;
|
||||
};
|
||||
|
||||
// A floating-point version of WordFeature, used as an intermediate during
|
||||
// scaling.
|
||||
struct FloatWordFeature {
|
||||
static void FromWordFeatures(const GenericVector<WordFeature>& word_features,
|
||||
GenericVector<FloatWordFeature>* float_features);
|
||||
// Sort function to sort first by x-bucket, then by y.
|
||||
static int SortByXBucket(const void*, const void*);
|
||||
|
||||
float x;
|
||||
float y;
|
||||
float dir;
|
||||
int x_bucket;
|
||||
};
|
||||
|
||||
// Class to hold information on a single image:
|
||||
// Filename, cached image as a Pix*, character boxes, text transcription.
|
||||
// The text transcription is the ground truth UTF-8 text for the image.
|
||||
// Character boxes are optional and indicate the desired segmentation of
|
||||
// the text into recognition units.
|
||||
class ImageData {
|
||||
public:
|
||||
ImageData();
|
||||
// Takes ownership of the pix.
|
||||
ImageData(bool vertical, Pix* pix);
|
||||
~ImageData();
|
||||
|
||||
// Builds and returns an ImageData from the basic data. Note that imagedata,
|
||||
// truth_text, and box_text are all the actual file data, NOT filenames.
|
||||
static ImageData* Build(const char* name, int page_number, const char* lang,
|
||||
const char* imagedata, int imagedatasize,
|
||||
const char* truth_text, const char* box_text);
|
||||
|
||||
// Writes to the given file. Returns false in case of error.
|
||||
bool Serialize(TFile* fp) const;
|
||||
// Reads from the given file. Returns false in case of error.
|
||||
// If swap is true, assumes a big/little-endian swap is needed.
|
||||
bool DeSerialize(bool swap, TFile* fp);
|
||||
// As DeSerialize, but only seeks past the data - hence a static method.
|
||||
static bool SkipDeSerialize(bool swap, tesseract::TFile* fp);
|
||||
|
||||
// Other accessors.
|
||||
const STRING& imagefilename() const {
|
||||
return imagefilename_;
|
||||
}
|
||||
void set_imagefilename(const STRING& name) {
|
||||
imagefilename_ = name;
|
||||
}
|
||||
int page_number() const {
|
||||
return page_number_;
|
||||
}
|
||||
void set_page_number(int num) {
|
||||
page_number_ = num;
|
||||
}
|
||||
const GenericVector<char>& image_data() const {
|
||||
return image_data_;
|
||||
}
|
||||
const STRING& language() const {
|
||||
return language_;
|
||||
}
|
||||
void set_language(const STRING& lang) {
|
||||
language_ = lang;
|
||||
}
|
||||
const STRING& transcription() const {
|
||||
return transcription_;
|
||||
}
|
||||
const GenericVector<TBOX>& boxes() const {
|
||||
return boxes_;
|
||||
}
|
||||
const GenericVector<STRING>& box_texts() const {
|
||||
return box_texts_;
|
||||
}
|
||||
const STRING& box_text(int index) const {
|
||||
return box_texts_[index];
|
||||
}
|
||||
// Saves the given Pix as a PNG-encoded string and destroys it.
|
||||
void SetPix(Pix* pix);
|
||||
// Returns the Pix image for *this. Must be pixDestroyed after use.
|
||||
Pix* GetPix() const;
|
||||
// Gets anything and everything with a non-NULL pointer, prescaled to a
|
||||
// given target_height (if 0, then the original image height), and aligned.
|
||||
// Also returns (if not NULL) the width and height of the scaled image.
|
||||
// The return value is the scaled Pix, which must be pixDestroyed after use,
|
||||
// and scale_factor (if not NULL) is set to the scale factor that was applied
|
||||
// to the image to achieve the target_height.
|
||||
Pix* PreScale(int target_height, int max_height, float* scale_factor,
|
||||
int* scaled_width, int* scaled_height,
|
||||
GenericVector<TBOX>* boxes) const;
|
||||
|
||||
int MemoryUsed() const;
|
||||
|
||||
// Draws the data in a new window.
|
||||
void Display() const;
|
||||
|
||||
// Adds the supplied boxes and transcriptions that correspond to the correct
|
||||
// page number.
|
||||
void AddBoxes(const GenericVector<TBOX>& boxes,
|
||||
const GenericVector<STRING>& texts,
|
||||
const GenericVector<int>& box_pages);
|
||||
|
||||
private:
|
||||
// Saves the given Pix as a PNG-encoded string and destroys it.
|
||||
static void SetPixInternal(Pix* pix, GenericVector<char>* image_data);
|
||||
// Returns the Pix image for the image_data. Must be pixDestroyed after use.
|
||||
static Pix* GetPixInternal(const GenericVector<char>& image_data);
|
||||
// Parses the text string as a box file and adds any discovered boxes that
|
||||
// match the page number. Returns false on error.
|
||||
bool AddBoxes(const char* box_text);
|
||||
|
||||
private:
|
||||
STRING imagefilename_; // File to read image from.
|
||||
inT32 page_number_; // Page number if multi-page tif or -1.
|
||||
GenericVector<char> image_data_; // PNG file data.
|
||||
STRING language_; // Language code for image.
|
||||
STRING transcription_; // UTF-8 ground truth of image.
|
||||
GenericVector<TBOX> boxes_; // If non-empty boxes of the image.
|
||||
GenericVector<STRING> box_texts_; // String for text in each box.
|
||||
bool vertical_text_; // Image has been rotated from vertical.
|
||||
};
|
||||
|
||||
// A collection of ImageData that knows roughly how much memory it is using.
|
||||
class DocumentData {
|
||||
friend void* ReCachePagesFunc(void* data);
|
||||
|
||||
public:
|
||||
explicit DocumentData(const STRING& name);
|
||||
~DocumentData();
|
||||
|
||||
// Reads all the pages in the given lstmf filename to the cache. The reader
|
||||
// is used to read the file.
|
||||
bool LoadDocument(const char* filename, const char* lang, int start_page,
|
||||
inT64 max_memory, FileReader reader);
|
||||
// Sets up the document, without actually loading it.
|
||||
void SetDocument(const char* filename, const char* lang, inT64 max_memory,
|
||||
FileReader reader);
|
||||
// Writes all the pages to the given filename. Returns false on error.
|
||||
bool SaveDocument(const char* filename, FileWriter writer);
|
||||
bool SaveToBuffer(GenericVector<char>* buffer);
|
||||
|
||||
// Adds the given page data to this document, counting up memory.
|
||||
void AddPageToDocument(ImageData* page);
|
||||
|
||||
const STRING& document_name() const {
|
||||
SVAutoLock lock(&general_mutex_);
|
||||
return document_name_;
|
||||
}
|
||||
int NumPages() const {
|
||||
SVAutoLock lock(&general_mutex_);
|
||||
return total_pages_;
|
||||
}
|
||||
inT64 memory_used() const {
|
||||
SVAutoLock lock(&general_mutex_);
|
||||
return memory_used_;
|
||||
}
|
||||
// If the given index is not currently loaded, loads it using a separate
|
||||
// thread. Note: there are 4 cases:
|
||||
// Document uncached: IsCached() returns false, total_pages_ < 0.
|
||||
// Required page is available: IsPageAvailable returns true. In this case,
|
||||
// total_pages_ > 0 and
|
||||
// pages_offset_ <= index%total_pages_ <= pages_offset_+pages_.size()
|
||||
// Pages are loaded, but the required one is not.
|
||||
// The requested page is being loaded by LoadPageInBackground. In this case,
|
||||
// index == pages_offset_. Once the loading starts, the pages lock is held
|
||||
// until it completes, at which point IsPageAvailable will unblock and return
|
||||
// true.
|
||||
void LoadPageInBackground(int index);
|
||||
// Returns a pointer to the page with the given index, modulo the total
|
||||
// number of pages. Blocks until the background load is completed.
|
||||
const ImageData* GetPage(int index);
|
||||
// Returns true if the requested page is available, and provides a pointer,
|
||||
// which may be NULL if the document is empty. May block, even though it
|
||||
// doesn't guarantee to return true.
|
||||
bool IsPageAvailable(int index, ImageData** page);
|
||||
// Takes ownership of the given page index. The page is made NULL in *this.
|
||||
ImageData* TakePage(int index) {
|
||||
SVAutoLock lock(&pages_mutex_);
|
||||
ImageData* page = pages_[index];
|
||||
pages_[index] = NULL;
|
||||
return page;
|
||||
}
|
||||
// Returns true if the document is currently loaded or in the process of
|
||||
// loading.
|
||||
bool IsCached() const { return NumPages() >= 0; }
|
||||
// Removes all pages from memory and frees the memory, but does not forget
|
||||
// the document metadata. Returns the memory saved.
|
||||
inT64 UnCache();
|
||||
|
||||
private:
|
||||
// Sets the value of total_pages_ behind a mutex.
|
||||
void set_total_pages(int total) {
|
||||
SVAutoLock lock(&general_mutex_);
|
||||
total_pages_ = total;
|
||||
}
|
||||
void set_memory_used(inT64 memory_used) {
|
||||
SVAutoLock lock(&general_mutex_);
|
||||
memory_used_ = memory_used;
|
||||
}
|
||||
// Locks the pages_mutex_ and Loads as many pages can fit in max_memory_
|
||||
// starting at index pages_offset_.
|
||||
bool ReCachePages();
|
||||
|
||||
private:
|
||||
// A name for this document.
|
||||
STRING document_name_;
|
||||
// The language of this document.
|
||||
STRING lang_;
|
||||
// A group of pages that corresponds in some loose way to a document.
|
||||
PointerVector<ImageData> pages_;
|
||||
// Page number of the first index in pages_.
|
||||
int pages_offset_;
|
||||
// Total number of pages in document (may exceed size of pages_.)
|
||||
int total_pages_;
|
||||
// Total of all pix sizes in the document.
|
||||
inT64 memory_used_;
|
||||
// Max memory to use at any time.
|
||||
inT64 max_memory_;
|
||||
// Saved reader from LoadDocument to allow re-caching.
|
||||
FileReader reader_;
|
||||
// Mutex that protects pages_ and pages_offset_ against multiple parallel
|
||||
// loads, and provides a wait for page.
|
||||
SVMutex pages_mutex_;
|
||||
// Mutex that protects other data members that callers want to access without
|
||||
// waiting for a load operation.
|
||||
mutable SVMutex general_mutex_;
|
||||
};
|
||||
|
||||
// A collection of DocumentData that knows roughly how much memory it is using.
|
||||
// Note that while it supports background read-ahead, it assumes that a single
|
||||
// thread is accessing documents, ie it is not safe for multiple threads to
|
||||
// access different documents in parallel, as one may de-cache the other's
|
||||
// content.
|
||||
class DocumentCache {
|
||||
public:
|
||||
explicit DocumentCache(inT64 max_memory);
|
||||
~DocumentCache();
|
||||
|
||||
// Deletes all existing documents from the cache.
|
||||
void Clear() {
|
||||
documents_.clear();
|
||||
num_pages_per_doc_ = 0;
|
||||
}
|
||||
// Adds all the documents in the list of filenames, counting memory.
|
||||
// The reader is used to read the files.
|
||||
bool LoadDocuments(const GenericVector<STRING>& filenames, const char* lang,
|
||||
CachingStrategy cache_strategy, FileReader reader);
|
||||
|
||||
// Adds document to the cache.
|
||||
bool AddToCache(DocumentData* data);
|
||||
|
||||
// Finds and returns a document by name.
|
||||
DocumentData* FindDocument(const STRING& document_name) const;
|
||||
|
||||
// Returns a page by serial number using the current cache_strategy_ to
|
||||
// determine the mapping from serial number to page.
|
||||
const ImageData* GetPageBySerial(int serial) {
|
||||
if (cache_strategy_ == CS_SEQUENTIAL)
|
||||
return GetPageSequential(serial);
|
||||
else
|
||||
return GetPageRoundRobin(serial);
|
||||
}
|
||||
|
||||
const PointerVector<DocumentData>& documents() const {
|
||||
return documents_;
|
||||
}
|
||||
// Returns the total number of pages in an epoch. For CS_ROUND_ROBIN cache
|
||||
// strategy, could take a long time.
|
||||
int TotalPages();
|
||||
|
||||
private:
|
||||
// Returns a page by serial number, selecting them in a round-robin fashion
|
||||
// from all the documents. Highly disk-intensive, but doesn't need samples
|
||||
// to be shuffled between files to begin with.
|
||||
const ImageData* GetPageRoundRobin(int serial);
|
||||
// Returns a page by serial number, selecting them in sequence from each file.
|
||||
// Requires the samples to be shuffled between the files to give a random or
|
||||
// uniform distribution of data. Less disk-intensive than GetPageRoundRobin.
|
||||
const ImageData* GetPageSequential(int serial);
|
||||
|
||||
// Helper counts the number of adjacent cached neighbour documents_ of index
|
||||
// looking in direction dir, ie index+dir, index+2*dir etc.
|
||||
int CountNeighbourDocs(int index, int dir);
|
||||
|
||||
// A group of pages that corresponds in some loose way to a document.
|
||||
PointerVector<DocumentData> documents_;
|
||||
// Strategy to use for caching and serializing data samples.
|
||||
CachingStrategy cache_strategy_;
|
||||
// Number of pages in the first document, used as a divisor in
|
||||
// GetPageSequential to determine the document index.
|
||||
int num_pages_per_doc_;
|
||||
// Max memory allowed in this cache.
|
||||
inT64 max_memory_;
|
||||
};
|
||||
|
||||
} // namespace tesseract
|
||||
|
||||
|
||||
#endif // TESSERACT_IMAGE_IMAGEDATA_H_
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue