initialize

2022-05-03 11:56:07 +08:00 · 2022-05-03 11:56:07 +08:00 · d1907a5191
commit d1907a5191
960 changed files with 591304 additions and 0 deletions
--- a/build.sh
+++ b/build.sh
@ -0,0 +1,33 @@
+cd ../
+mkdir build
+cd build
+mkdir imgpc
+cd imgpc
+rm -rf *
+cmake ../../device/hgdriver/ImageProcess
+make
+
+cd ..
+mkdir hgdev
+cd hgdev
+rm -rf *
+cmake ../../device/hgdriver/hgdev
+make
+
+cd ..
+mkdir wrapper
+cd wrapper
+rm -rf *
+cmake ../../device/hgdriver/huagaoxxx_warraper_ex
+make
+
+cd ..
+mkdir hgsane
+cd hgsane
+rm -rf *
+cmake ../../device/hgsane
+make
+sudo cp ../../release/Linux/x86_64/libsane-hgsane.so /usr/lib/x86_64-linux-gnu/sane/libsane-hgsane.so.1
+
+
+
--- a/hgdriver/3rdparty/cyusb/inc/CyAPI.h
+++ b/hgdriver/3rdparty/cyusb/inc/CyAPI.h
@ -0,0 +1,462 @@
+//______________________________________________________________________________
+//
+// Copyright (c) Cypress Semiconductor, 2003
+// All rights reserved.
+//
+//______________________________________________________________________________
+
+#ifndef CyUSBH
+#define CyUSBH
+
+#ifndef   __USB200_H__
+#define   __USB200_H__
+#include <Windows.h>
+
+#pragma pack(push,1)	///
+
+typedef struct _USB_DEVICE_DESCRIPTOR {							//设备描述符
+    UCHAR bLength;			//长度
+    UCHAR bDescriptorType;	//描述符类型
+    USHORT bcdUSB;			//USB
+    UCHAR bDeviceClass;		//设备类
+    UCHAR bDeviceSubClass;	//设备派生类
+    UCHAR bDeviceProtocol;	//设备协议
+    UCHAR bMaxPacketSize0;	//最大数据包尺寸
+    USHORT idVendor;		//厂商ID
+    USHORT idProduct;		//产品ID
+    USHORT bcdDevice;		//设备
+    UCHAR iManufacturer;	//制造商
+    UCHAR iProduct;			//产品
+    UCHAR iSerialNumber;	//序列号
+    UCHAR bNumConfigurations;//配置
+} USB_DEVICE_DESCRIPTOR, *PUSB_DEVICE_DESCRIPTOR;
+
+typedef struct _USB_ENDPOINT_DESCRIPTOR {						//端点描述符
+    UCHAR bLength;			//长度
+    UCHAR bDescriptorType;	//描述符类型
+    UCHAR bEndpointAddress;	//端点地址
+    UCHAR bmAttributes;		//端点属性
+    USHORT wMaxPacketSize;	//最大数据包尺寸
+    UCHAR bInterval;		//间隔
+} USB_ENDPOINT_DESCRIPTOR, *PUSB_ENDPOINT_DESCRIPTOR;
+
+typedef struct _USB_CONFIGURATION_DESCRIPTOR {					//配置描述符
+    UCHAR bLength;
+    UCHAR bDescriptorType;
+    USHORT wTotalLength;
+    UCHAR bNumInterfaces;
+    UCHAR bConfigurationValue;
+    UCHAR iConfiguration;
+    UCHAR bmAttributes;
+    UCHAR MaxPower;
+} USB_CONFIGURATION_DESCRIPTOR, *PUSB_CONFIGURATION_DESCRIPTOR;
+
+typedef struct _USB_INTERFACE_DESCRIPTOR {						//接口描述符
+    UCHAR bLength;
+    UCHAR bDescriptorType;
+    UCHAR bInterfaceNumber;
+    UCHAR bAlternateSetting;
+    UCHAR bNumEndpoints;
+    UCHAR bInterfaceClass;
+    UCHAR bInterfaceSubClass;
+    UCHAR bInterfaceProtocol;
+    UCHAR iInterface;
+} USB_INTERFACE_DESCRIPTOR, *PUSB_INTERFACE_DESCRIPTOR;
+
+typedef struct _USB_STRING_DESCRIPTOR {							//字符串描述符
+    UCHAR bLength;
+    UCHAR bDescriptorType;
+    WCHAR bString[1];
+} USB_STRING_DESCRIPTOR, *PUSB_STRING_DESCRIPTOR;
+
+typedef struct _USB_COMMON_DESCRIPTOR {							//USB串口描述符
+    UCHAR bLength;
+    UCHAR bDescriptorType;
+} USB_COMMON_DESCRIPTOR, *PUSB_COMMON_DESCRIPTOR;
+#pragma pack(pop)
+#endif
+//______________________________________________________________________________
+
+class CCyIsoPktInfo {	//包信息
+public:
+	LONG Status;	//包状态
+	LONG Length;	//包长度
+};
+
+//______________________________________________________________________________
+
+
+// {AE18AA60-7F6A-11d4-97DD-00010229B959}
+static GUID CYUSBDRV_GUID = {0xae18aa60, 0x7f6a, 0x11d4, 0x97, 0xdd, 0x0, 0x1, 0x2, 0x29, 0xb9, 0x59};
+
+typedef enum {TGT_DEVICE, TGT_INTFC, TGT_ENDPT, TGT_OTHER } CTL_XFER_TGT_TYPE;
+typedef enum {REQ_STD, REQ_CLASS, REQ_VENDOR } CTL_XFER_REQ_TYPE;
+typedef enum {DIR_TO_DEVICE, DIR_FROM_DEVICE } CTL_XFER_DIR_TYPE;
+typedef enum {XMODE_BUFFERED, XMODE_DIRECT } XFER_MODE_TYPE;
+
+const int MAX_ENDPTS = 16;
+const int MAX_INTERFACES = 8;
+const int USB_STRING_MAXLEN = 256;
+
+
+////////////////////////////////////////////////////////////////////////////////
+//
+// The CCyEndPoint ABSTRACT Class
+//
+////////////////////////////////////////////////////////////////////////////////
+class CCyUSBEndPoint
+{
+protected:
+  bool WaitForIO(OVERLAPPED *ovLapStatus);
+
+  virtual PUCHAR BeginDirectXfer(PUCHAR buf, LONG bufLen, OVERLAPPED *ov);		//直接传输模式
+  virtual PUCHAR BeginBufferedXfer(PUCHAR buf, LONG bufLen, OVERLAPPED *ov);	//缓冲传输模式
+
+
+public:
+
+  CCyUSBEndPoint(void);
+  CCyUSBEndPoint(CCyUSBEndPoint& ept);
+  CCyUSBEndPoint(HANDLE h, PUSB_ENDPOINT_DESCRIPTOR pEndPtDescriptor);
+
+  HANDLE        hDevice;
+
+  // The fields of an EndPoint Descriptor
+  UCHAR  DscLen;
+  UCHAR  DscType;
+  UCHAR  Address;
+  UCHAR  Attributes;
+  USHORT MaxPktSize;
+  USHORT PktsPerFrame;
+  UCHAR  Interval;
+
+  // Other fields
+  ULONG  TimeOut;
+  ULONG  UsbdStatus;
+  ULONG  NtStatus;
+
+  DWORD  bytesWritten;
+  DWORD  LastError;
+  bool   bIn;
+
+  XFER_MODE_TYPE   XferMode;
+
+  bool    XferData(PUCHAR buf, LONG &len, CCyIsoPktInfo* pktInfos = NULL);
+  bool	  XferData(PUCHAR buf, LONG &bufLen, CCyIsoPktInfo* pktInfos, bool pktMode);
+  virtual PUCHAR BeginDataXfer(PUCHAR buf, LONG len, OVERLAPPED *ov) = 0;
+  virtual bool FinishDataXfer(PUCHAR buf, LONG &len, OVERLAPPED *ov, PUCHAR pXmitBuf, CCyIsoPktInfo* pktInfos = NULL);
+  bool    WaitForXfer(OVERLAPPED *ov, ULONG tOut);
+  ULONG   GetXferSize(void);
+  void    SetXferSize(ULONG xfer);
+
+  bool Reset(void);
+  bool Abort(void);
+
+private:
+
+
+
+};
+
+
+////////////////////////////////////////////////////////////////////////////////
+//
+// The Control Endpoint Class
+//
+////////////////////////////////////////////////////////////////////////////////
+class CCyControlEndPoint : public CCyUSBEndPoint
+{
+private:
+
+public:
+  CCyControlEndPoint(void);
+  CCyControlEndPoint(CCyControlEndPoint& ept);
+  CCyControlEndPoint(HANDLE h, PUSB_ENDPOINT_DESCRIPTOR pEndPtDescriptor);
+
+  CTL_XFER_TGT_TYPE Target;
+  CTL_XFER_REQ_TYPE ReqType;
+  CTL_XFER_DIR_TYPE Direction;
+
+  UCHAR             ReqCode;
+  WORD              Value;
+  WORD              Index;
+
+  bool Read(PUCHAR buf, LONG &len);
+  bool Write(PUCHAR buf, LONG &len);
+  PUCHAR BeginDataXfer(PUCHAR buf, LONG len, OVERLAPPED *ov);
+};
+
+
+
+////////////////////////////////////////////////////////////////////////////////
+//
+// The Isoc Endpoint Class
+//
+////////////////////////////////////////////////////////////////////////////////
+class CCyIsocEndPoint : public CCyUSBEndPoint
+{
+
+protected:
+  virtual PUCHAR BeginDirectXfer(PUCHAR buf, LONG bufLen, OVERLAPPED *ov);
+  virtual PUCHAR BeginBufferedXfer(PUCHAR buf, LONG bufLen, OVERLAPPED *ov);
+
+public:
+  CCyIsocEndPoint(void);
+  CCyIsocEndPoint(HANDLE h, PUSB_ENDPOINT_DESCRIPTOR pEndPtDescriptor);
+
+  PUCHAR BeginDataXfer(PUCHAR buf, LONG len, OVERLAPPED *ov);
+  CCyIsoPktInfo* CreatePktInfos(LONG bufLen, int &packets);
+
+};
+
+
+
+////////////////////////////////////////////////////////////////////////////////
+//
+// The Bulk Endpoint Class
+//
+////////////////////////////////////////////////////////////////////////////////
+class CCyBulkEndPoint : public CCyUSBEndPoint
+{
+public:
+  CCyBulkEndPoint(void);
+  CCyBulkEndPoint(HANDLE h, PUSB_ENDPOINT_DESCRIPTOR pEndPtDescriptor);
+
+  PUCHAR BeginDataXfer(PUCHAR buf, LONG len, OVERLAPPED *ov);
+};
+
+
+
+////////////////////////////////////////////////////////////////////////////////
+//
+// The Interrupt Endpoint Class
+//
+////////////////////////////////////////////////////////////////////////////////
+class CCyInterruptEndPoint : public CCyUSBEndPoint
+{
+public:
+  CCyInterruptEndPoint(void);
+  CCyInterruptEndPoint(HANDLE h, PUSB_ENDPOINT_DESCRIPTOR pEndPtDescriptor);
+
+  PUCHAR BeginDataXfer(PUCHAR buf, LONG len, OVERLAPPED *ov);
+};
+
+
+
+////////////////////////////////////////////////////////////////////////////////
+//
+// The Interface Class
+//
+////////////////////////////////////////////////////////////////////////////////
+
+class CCyUSBInterface
+{
+private:
+protected:
+public:
+  CCyUSBEndPoint *EndPoints[MAX_ENDPTS];  // Holds pointers to all the interface's endpoints, plus a pointer to the Control endpoint zero
+
+  UCHAR bLength;
+  UCHAR bDescriptorType;
+  UCHAR bInterfaceNumber;
+  UCHAR bAlternateSetting;
+  UCHAR bNumEndpoints;           // Not counting the control endpoint
+  UCHAR bInterfaceClass;
+  UCHAR bInterfaceSubClass;
+  UCHAR bInterfaceProtocol;
+  UCHAR iInterface;
+
+  UCHAR bAltSettings;
+  USHORT wTotalLength;          // Needed in case Intfc has additional (non-endpt) descriptors
+
+
+  CCyUSBInterface(HANDLE h, PUSB_INTERFACE_DESCRIPTOR pIntfcDescriptor);
+  CCyUSBInterface(CCyUSBInterface& ifc);  // Copy Constructor
+  ~CCyUSBInterface(void);
+
+};
+
+////////////////////////////////////////////////////////////////////////////////
+//
+// The Config Class
+//
+////////////////////////////////////////////////////////////////////////////////
+
+class CCyUSBConfig
+{
+private:
+
+protected:
+public:
+  CCyUSBInterface *Interfaces[MAX_INTERFACES];
+
+  UCHAR bLength;
+  UCHAR bDescriptorType;
+  USHORT wTotalLength;
+  UCHAR bNumInterfaces;
+  UCHAR bConfigurationValue;
+  UCHAR iConfiguration;
+  UCHAR bmAttributes;
+  UCHAR MaxPower;
+
+  UCHAR AltInterfaces;
+
+
+  CCyUSBConfig(void);
+  CCyUSBConfig(CCyUSBConfig& cfg);  // Copy Constructor
+  CCyUSBConfig(HANDLE h, PUSB_CONFIGURATION_DESCRIPTOR pConfigDescr);
+  ~CCyUSBConfig(void);
+
+
+
+};
+
+////////////////////////////////////////////////////////////////////////////////
+//
+// The USB Device Class - This is the main class that contains members of all the
+// other classes.
+//
+// To use the library, create an instance of this Class and call it's Open method
+//
+////////////////////////////////////////////////////////////////////////////////
+
+class CCyUSBDevice
+{
+// The public members are accessible (i.e. corruptible) by the user of the library
+// Algorithms of the class don't rely on any public members.  Instead, they use the
+// private members of the class for their calculations.
+
+public:
+
+  CCyUSBDevice(HANDLE hnd = NULL, GUID guid = CYUSBDRV_GUID, BOOL bOpen = true);
+  ~CCyUSBDevice(void);
+
+  CCyUSBEndPoint      **EndPoints;     // Shortcut to USBCfgs[CfgNum]->Interfaces[IntfcIndex]->Endpoints
+  CCyUSBEndPoint       *EndPointOf(UCHAR addr);
+
+  CCyControlEndPoint   *ControlEndPt;
+  CCyIsocEndPoint      *IsocInEndPt;
+  CCyIsocEndPoint      *IsocOutEndPt;
+  CCyBulkEndPoint      *BulkInEndPt;
+  CCyBulkEndPoint      *BulkOutEndPt;
+  CCyInterruptEndPoint *InterruptInEndPt;
+  CCyInterruptEndPoint *InterruptOutEndPt;
+
+  USHORT                StrLangID;
+  ULONG                 UsbdStatus;
+  ULONG                 NtStatus;
+  ULONG                 DriverVersion;
+  ULONG                 USBDIVersion;
+  char                  DeviceName[USB_STRING_MAXLEN];
+  char                  FriendlyName[USB_STRING_MAXLEN];
+  wchar_t               Manufacturer[USB_STRING_MAXLEN];
+  wchar_t               Product[USB_STRING_MAXLEN];
+  wchar_t               SerialNumber[USB_STRING_MAXLEN];
+
+  CHAR                  DevPath[USB_STRING_MAXLEN];
+
+  USHORT                BcdUSB;
+  USHORT                VendorID;
+  USHORT                ProductID;
+  UCHAR                 USBAddress;
+  UCHAR                 DevClass;
+  UCHAR                 DevSubClass;
+  UCHAR                 DevProtocol;
+  UCHAR                 MaxPacketSize;
+  USHORT                BcdDevice;
+
+  UCHAR                 ConfigValue;
+  UCHAR                 ConfigAttrib;
+  UCHAR                 MaxPower;
+
+  UCHAR                 IntfcClass;
+  UCHAR                 IntfcSubClass;
+  UCHAR                 IntfcProtocol;
+  bool                  bHighSpeed;
+
+  DWORD                 BytesXfered;
+
+
+  UCHAR                 DeviceCount(void);
+  UCHAR                 ConfigCount(void);
+  UCHAR                 IntfcCount(void);
+  UCHAR                 AltIntfcCount(void);
+  UCHAR                 EndPointCount(void);
+
+  UCHAR                 Config(void)     { return CfgNum; }    // Normally 0
+  void                  SetConfig(UCHAR cfg);
+
+  UCHAR                 Interface(void)  { return IntfcNum; }  // Usually 0
+                        // No SetInterface method since only 1 intfc per device (per Windows)
+
+  UCHAR                 AltIntfc(void);
+  bool                  SetAltIntfc(UCHAR alt);
+
+  GUID					DriverGUID(void) { return DrvGuid; }
+  HANDLE				DeviceHandle(void) { return hDevice; }
+  void                  UsbdStatusString(ULONG stat, PCHAR s);
+  bool                  CreateHandle(UCHAR dev);
+  void                  DestroyHandle();
+
+  bool                  Open(UCHAR dev);
+  void                  Close(void);
+  bool                  Reset(void);
+  bool                  ReConnect(void);
+  bool                  Suspend(void);
+  bool                  Resume(void);
+  bool                  IsOpen(void)      { return (hDevice != INVALID_HANDLE_VALUE); }
+
+  UCHAR                 PowerState(void);
+
+
+  void                  GetDeviceDescriptor(PUSB_DEVICE_DESCRIPTOR descr);
+  void                  GetConfigDescriptor(PUSB_CONFIGURATION_DESCRIPTOR descr);
+  void                  GetIntfcDescriptor(PUSB_INTERFACE_DESCRIPTOR descr);
+  CCyUSBConfig          GetUSBConfig(int index);
+
+
+private:
+
+  USB_DEVICE_DESCRIPTOR         USBDeviceDescriptor;
+  PUSB_CONFIGURATION_DESCRIPTOR USBConfigDescriptors[2];
+
+  CCyUSBConfig                 *USBCfgs[2];
+
+  HANDLE                        hWnd;
+  HANDLE                        hDevice;
+  HANDLE                        hDevNotification;
+  HANDLE                        hHndNotification;
+
+  GUID                          DrvGuid;
+
+  UCHAR                         Devices;
+  UCHAR                         Interfaces;
+  UCHAR                         AltInterfaces;
+  UCHAR                         Configs;
+
+  UCHAR                         DevNum;
+  UCHAR                         CfgNum;
+  UCHAR                         IntfcNum;     // The current selected interface's bInterfaceNumber
+  UCHAR                         IntfcIndex;   // The entry in the Config's interfaces table matching to IntfcNum and AltSetting
+
+  void                          GetDevDescriptor(void);
+  void                          GetCfgDescriptor(int descIndex);
+  void                          GetString(wchar_t *s, UCHAR sIndex);
+  void                          SetStringDescrLanguage(void);
+  void                          SetAltIntfcParams(UCHAR alt);
+  bool                          IoControl(ULONG cmd, PUCHAR buf, ULONG len);
+
+  void                          SetEndPointPtrs(void);
+  void                          GetDeviceName(void);
+  void                          GetFriendlyName(void);
+  void                          GetDriverVer(void);
+  void                          GetUSBDIVer(void);
+  void                          GetSpeed(void);
+  void                          GetUSBAddress(void);
+  //void						CloseEndPtHandles(void);
+
+  bool                          RegisterForPnpEvents(HANDLE h);
+};
+
+//---------------------------------------------------------------------------
+#endif
--- a/hgdriver/3rdparty/hgOCR/include/api/StopWatch.h
+++ b/hgdriver/3rdparty/hgOCR/include/api/StopWatch.h
@ -0,0 +1,34 @@
+#pragma once
+#include <chrono>
+
+class StopWatch
+{
+public:
+	StopWatch() {
+		_start = std::chrono::steady_clock::now();
+	}
+
+	void reset() {
+		_start = std::chrono::steady_clock::now();
+	}
+
+	double elapsed_s() {
+		return std::chrono::duration<double>(std::chrono::steady_clock::now() - _start).count();
+	}
+
+	double elapsed_ms() {
+		return std::chrono::duration<double, std::milli>(std::chrono::steady_clock::now() - _start).count();
+	}
+
+	double elapsed_us() {
+		return std::chrono::duration<double, std::micro>(std::chrono::steady_clock::now() - _start).count();
+	}
+
+	double elapsed_ns() {
+		return std::chrono::duration<double, std::nano>(std::chrono::steady_clock::now() - _start).count();
+	}
+
+private:
+	std::chrono::steady_clock::time_point _start;
+};
+
--- a/hgdriver/3rdparty/hgOCR/include/api/apitypes.h
+++ b/hgdriver/3rdparty/hgOCR/include/api/apitypes.h
@ -0,0 +1,33 @@
+///////////////////////////////////////////////////////////////////////
+// File:        apitypes.h
+// Description: Types used in both the API and internally
+// Author:      Ray Smith
+// Created:     Wed Mar 03 09:22:53 PST 2010
+//
+// (C) Copyright 2010, Google Inc.
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+// http://www.apache.org/licenses/LICENSE-2.0
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+///////////////////////////////////////////////////////////////////////
+
+#ifndef TESSERACT_API_APITYPES_H__
+#define TESSERACT_API_APITYPES_H__
+
+#include "publictypes.h"
+
+// The types used by the API and Page/ResultIterator can be found in:
+//   ccstruct/publictypes.h
+//   ccmain/resultiterator.h
+//   ccmain/pageiterator.h
+// API interfaces and API users should be sure to include this file, rather
+// than the lower-level one, and lower-level code should be sure to include
+// only the lower-level file.
+
+#endif  // TESSERACT_API_APITYPES_H__
--- a/hgdriver/3rdparty/hgOCR/include/api/baseapi.cpp
+++ b/hgdriver/3rdparty/hgOCR/include/api/baseapi.cpp
--- a/hgdriver/3rdparty/hgOCR/include/api/baseapi.h
+++ b/hgdriver/3rdparty/hgOCR/include/api/baseapi.h
@ -0,0 +1,922 @@
+///////////////////////////////////////////////////////////////////////
+// File:        baseapi.h
+// Description: Simple API for calling tesseract.
+// Author:      Ray Smith
+// Created:     Fri Oct 06 15:35:01 PDT 2006
+//
+// (C) Copyright 2006, Google Inc.
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+// http://www.apache.org/licenses/LICENSE-2.0
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+///////////////////////////////////////////////////////////////////////
+
+#ifndef TESSERACT_API_BASEAPI_H__
+#define TESSERACT_API_BASEAPI_H__
+
+#define TESSERACT_VERSION_STR "3.05.02"
+#define TESSERACT_VERSION 0x030502
+#define MAKE_VERSION(major, minor, patch) (((major) << 16) | ((minor) << 8) | \
+                                            (patch))
+
+#include <stdio.h>
+// To avoid collision with other typenames include the ABSOLUTE MINIMUM
+// complexity of includes here. Use forward declarations wherever possible
+// and hide includes of complex types in baseapi.cpp.
+#include "platform.h"
+#include "apitypes.h"
+#include "thresholder.h"
+#include "unichar.h"
+#include "tesscallback.h"
+#include "publictypes.h"
+#include "pageiterator.h"
+#include "resultiterator.h"
+
+template <typename T> class GenericVector;
+class PAGE_RES;
+class PAGE_RES_IT;
+class ParagraphModel;
+struct BlamerBundle;
+class BLOCK_LIST;
+class DENORM;
+class MATRIX;
+class ROW;
+class STRING;
+class WERD;
+struct Pix;
+struct Box;
+struct Pixa;
+struct Boxa;
+class ETEXT_DESC;
+struct OSResults;
+class TBOX;
+class UNICHARSET;
+class WERD_CHOICE_LIST;
+
+struct INT_FEATURE_STRUCT;
+typedef INT_FEATURE_STRUCT *INT_FEATURE;
+struct TBLOB;
+
+namespace tesseract {
+
+#ifndef NO_CUBE_BUILD
+	class CubeRecoContext;
+#endif  // NO_CUBE_BUILD
+	class Dawg;
+	class Dict;
+	class EquationDetect;
+	class PageIterator;
+	class LTRResultIterator;
+	class ResultIterator;
+	class MutableIterator;
+	class TessResultRenderer;
+	class Tesseract;
+	class Trie;
+	class Wordrec;
+
+	typedef int (Dict::*DictFunc)(void* void_dawg_args,
+		UNICHAR_ID unichar_id, bool word_end) const;
+	typedef double (Dict::*ProbabilityInContextFunc)(const char* lang,
+		const char* context,
+		int context_bytes,
+		const char* character,
+		int character_bytes);
+	typedef float (Dict::*ParamsModelClassifyFunc)(
+		const char *lang, void *path);
+	typedef void (Wordrec::*FillLatticeFunc)(const MATRIX &ratings,
+		const WERD_CHOICE_LIST &best_choices,
+		const UNICHARSET &unicharset,
+		BlamerBundle *blamer_bundle);
+	typedef TessCallback4<const UNICHARSET &, int, PageIterator *, Pix *>
+		TruthCallback;
+
+	/**
+	 * Base class for all tesseract APIs.
+	 * Specific classes can add ability to work on different inputs or produce
+	 * different outputs.
+	 * This class is mostly an interface layer on top of the Tesseract instance
+	 * class to hide the data types so that users of this class don't have to
+	 * include any other Tesseract headers.
+	 */
+	class TESS_API TessBaseAPI {
+	public:
+		TessBaseAPI();
+		virtual ~TessBaseAPI();
+
+		/**
+		 * Returns the version identifier as a static string. Do not delete.
+		 */
+		static const char* Version();
+
+		/**
+		 * If compiled with OpenCL AND an available OpenCL
+		 * device is deemed faster than serial code, then
+		 * "device" is populated with the cl_device_id
+		 * and returns sizeof(cl_device_id)
+		 * otherwise *device=NULL and returns 0.
+		 */
+		static size_t getOpenCLDevice(void **device);
+
+		/**
+		 * Writes the thresholded image to stderr as a PBM file on receipt of a
+		 * SIGSEGV, SIGFPE, or SIGBUS signal. (Linux/Unix only).
+		 */
+		static void CatchSignals();
+
+		/**
+		 * Set the name of the input file. Needed for training and
+		 * reading a UNLV zone file, and for searchable PDF output.
+		 */
+		void SetInputName(const char* name);
+		/**
+		 * These functions are required for searchable PDF output.
+		 * We need our hands on the input file so that we can include
+		 * it in the PDF without transcoding. If that is not possible,
+		 * we need the original image. Finally, resolution metadata
+		 * is stored in the PDF so we need that as well.
+		 */
+		const char* GetInputName();
+		// Takes ownership of the input pix.
+		void SetInputImage(Pix *pix);
+		Pix* GetInputImage();
+		int GetSourceYResolution();
+		const char* GetDatapath();
+
+		/** Set the name of the bonus output files. Needed only for debugging. */
+		void SetOutputName(const char* name);
+
+		/**
+		 * Set the value of an internal "parameter."
+		 * Supply the name of the parameter and the value as a string, just as
+		 * you would in a config file.
+		 * Returns false if the name lookup failed.
+		 * Eg SetVariable("tessedit_char_blacklist", "xyz"); to ignore x, y and z.
+		 * Or SetVariable("classify_bln_numeric_mode", "1"); to set numeric-only mode.
+		 * SetVariable may be used before Init, but settings will revert to
+		 * defaults on End().
+		 *
+		 * Note: Must be called after Init(). Only works for non-init variables
+		 * (init variables should be passed to Init()).
+		 */
+		bool SetVariable(const char* name, const char* value);
+		bool SetDebugVariable(const char* name, const char* value);
+
+		/**
+		 * Returns true if the parameter was found among Tesseract parameters.
+		 * Fills in value with the value of the parameter.
+		 */
+		bool GetIntVariable(const char *name, int *value) const;
+		bool GetBoolVariable(const char *name, bool *value) const;
+		bool GetDoubleVariable(const char *name, double *value) const;
+
+		/**
+		 * Returns the pointer to the string that represents the value of the
+		 * parameter if it was found among Tesseract parameters.
+		 */
+		const char *GetStringVariable(const char *name) const;
+
+		/**
+		 * Print Tesseract parameters to the given file.
+		 */
+		void PrintVariables(FILE *fp) const;
+
+		/**
+		 * Get value of named variable as a string, if it exists.
+		 */
+		bool GetVariableAsString(const char *name, STRING *val);
+
+		/**
+		 * Instances are now mostly thread-safe and totally independent,
+		 * but some global parameters remain. Basically it is safe to use multiple
+		 * TessBaseAPIs in different threads in parallel, UNLESS:
+		 * you use SetVariable on some of the Params in classify and textord.
+		 * If you do, then the effect will be to change it for all your instances.
+		 *
+		 * Start tesseract. Returns zero on success and -1 on failure.
+		 * NOTE that the only members that may be called before Init are those
+		 * listed above here in the class definition.
+		 *
+		 * The datapath must be the name of the parent directory of tessdata and
+		 * must end in / . Any name after the last / will be stripped.
+		 * The language is (usually) an ISO 639-3 string or NULL will default to eng.
+		 * It is entirely safe (and eventually will be efficient too) to call
+		 * Init multiple times on the same instance to change language, or just
+		 * to reset the classifier.
+		 * The language may be a string of the form [~]<lang>[+[~]<lang>]* indicating
+		 * that multiple languages are to be loaded. Eg hin+eng will load Hindi and
+		 * English. Languages may specify internally that they want to be loaded
+		 * with one or more other languages, so the ~ sign is available to override
+		 * that. Eg if hin were set to load eng by default, then hin+~eng would force
+		 * loading only hin. The number of loaded languages is limited only by
+		 * memory, with the caveat that loading additional languages will impact
+		 * both speed and accuracy, as there is more work to do to decide on the
+		 * applicable language, and there is more chance of hallucinating incorrect
+		 * words.
+		 * WARNING: On changing languages, all Tesseract parameters are reset
+		 * back to their default values. (Which may vary between languages.)
+		 * If you have a rare need to set a Variable that controls
+		 * initialization for a second call to Init you should explicitly
+		 * call End() and then use SetVariable before Init. This is only a very
+		 * rare use case, since there are very few uses that require any parameters
+		 * to be set before Init.
+		 *
+		 * If set_only_non_debug_params is true, only params that do not contain
+		 * "debug" in the name will be set.
+		 */
+		int Init(const char* datapath, const char* language, OcrEngineMode mode,
+			char **configs, int configs_size,
+			const GenericVector<STRING> *vars_vec,
+			const GenericVector<STRING> *vars_values,
+			bool set_only_non_debug_params);
+		int Init(const char* datapath, const char* language, OcrEngineMode oem) {
+			return Init(datapath, language, oem, NULL, 0, NULL, NULL, false);
+		}
+		int Init(const char* datapath, const char* language) {
+			return Init(datapath, language, OEM_DEFAULT, NULL, 0, NULL, NULL, false);
+		}
+
+		/**
+		 * Returns the languages string used in the last valid initialization.
+		 * If the last initialization specified "deu+hin" then that will be
+		 * returned. If hin loaded eng automatically as well, then that will
+		 * not be included in this list. To find the languages actually
+		 * loaded use GetLoadedLanguagesAsVector.
+		 * The returned string should NOT be deleted.
+		 */
+		const char* GetInitLanguagesAsString() const;
+
+		/**
+		 * Returns the loaded languages in the vector of STRINGs.
+		 * Includes all languages loaded by the last Init, including those loaded
+		 * as dependencies of other loaded languages.
+		 */
+		void GetLoadedLanguagesAsVector(GenericVector<STRING>* langs) const;
+
+		/**
+		 * Returns the available languages in the vector of STRINGs.
+		 */
+		void GetAvailableLanguagesAsVector(GenericVector<STRING>* langs) const;
+
+		/**
+		 * Init only the lang model component of Tesseract. The only functions
+		 * that work after this init are SetVariable and IsValidWord.
+		 * WARNING: temporary! This function will be removed from here and placed
+		 * in a separate API at some future time.
+		 */
+		int InitLangMod(const char* datapath, const char* language);
+
+		/**
+		 * Init only for page layout analysis. Use only for calls to SetImage and
+		 * AnalysePage. Calls that attempt recognition will generate an error.
+		 */
+		void InitForAnalysePage();
+
+		/**
+		 * Read a "config" file containing a set of param, value pairs.
+		 * Searches the standard places: tessdata/configs, tessdata/tessconfigs
+		 * and also accepts a relative or absolute path name.
+		 * Note: only non-init params will be set (init params are set by Init()).
+		 */
+		void ReadConfigFile(const char* filename);
+		/** Same as above, but only set debug params from the given config file. */
+		void ReadDebugConfigFile(const char* filename);
+
+		/**
+		 * Set the current page segmentation mode. Defaults to PSM_SINGLE_BLOCK.
+		 * The mode is stored as an IntParam so it can also be modified by
+		 * ReadConfigFile or SetVariable("tessedit_pageseg_mode", mode as string).
+		 */
+		void SetPageSegMode(PageSegMode mode);
+
+		/** Return the current page segmentation mode. */
+		PageSegMode GetPageSegMode() const;
+
+		/**
+		 * Recognize a rectangle from an image and return the result as a string.
+		 * May be called many times for a single Init.
+		 * Currently has no error checking.
+		 * Greyscale of 8 and color of 24 or 32 bits per pixel may be given.
+		 * Palette color images will not work properly and must be converted to
+		 * 24 bit.
+		 * Binary images of 1 bit per pixel may also be given but they must be
+		 * byte packed with the MSB of the first byte being the first pixel, and a
+		 * 1 represents WHITE. For binary images set bytes_per_pixel=0.
+		 * The recognized text is returned as a char* which is coded
+		 * as UTF8 and must be freed with the delete [] operator.
+		 *
+		 * Note that TesseractRect is the simplified convenience interface.
+		 * For advanced uses, use SetImage, (optionally) SetRectangle, Recognize,
+		 * and one or more of the Get*Text functions below.
+		 */
+		char* TesseractRect(const unsigned char* imagedata,
+			int bytes_per_pixel, int bytes_per_line,
+			int left, int top, int width, int height);
+
+		/**
+		 * Call between pages or documents etc to free up memory and forget
+		 * adaptive data.
+		 */
+		void ClearAdaptiveClassifier();
+
+		/**
+		 * @defgroup AdvancedAPI Advanced API
+		 * The following methods break TesseractRect into pieces, so you can
+		 * get hold of the thresholded image, get the text in different formats,
+		 * get bounding boxes, confidences etc.
+		 */
+		 /* @{ */
+
+		/**
+		 * Provide an image for Tesseract to recognize. Format is as
+		 * TesseractRect above. Copies the image buffer and converts to Pix.
+		 * SetImage clears all recognition results, and sets the rectangle to the
+		 * full image, so it may be followed immediately by a GetUTF8Text, and it
+		 * will automatically perform recognition.
+		 */
+		void SetImage(const unsigned char* imagedata, int width, int height,
+			int bytes_per_pixel, int bytes_per_line);
+
+		/**
+		 * Provide an image for Tesseract to recognize. As with SetImage above,
+		 * Tesseract takes its own copy of the image, so it need not persist until
+		 * after Recognize.
+		 * Pix vs raw, which to use?
+		 * Use Pix where possible. Tesseract uses Pix as its internal representation
+		 * and it is therefore more efficient to provide a Pix directly.
+		 */
+		void SetImage(Pix* pix);
+
+		/**
+		 * Set the resolution of the source image in pixels per inch so font size
+		 * information can be calculated in results.  Call this after SetImage().
+		 */
+		void SetSourceResolution(int ppi);
+
+		/**
+		 * Restrict recognition to a sub-rectangle of the image. Call after SetImage.
+		 * Each SetRectangle clears the recogntion results so multiple rectangles
+		 * can be recognized with the same image.
+		 */
+		void SetRectangle(int left, int top, int width, int height);
+
+		/**
+		 * In extreme cases only, usually with a subclass of Thresholder, it
+		 * is possible to provide a different Thresholder. The Thresholder may
+		 * be preloaded with an image, settings etc, or they may be set after.
+		 * Note that Tesseract takes ownership of the Thresholder and will
+		 * delete it when it it is replaced or the API is destructed.
+		 */
+		void SetThresholder(ImageThresholder* thresholder) {
+			delete thresholder_;
+			thresholder_ = thresholder;
+			ClearResults();
+		}
+
+		/**
+		 * Get a copy of the internal thresholded image from Tesseract.
+		 * Caller takes ownership of the Pix and must pixDestroy it.
+		 * May be called any time after SetImage, or after TesseractRect.
+		 */
+		Pix* GetThresholdedImage();
+
+		/**
+		 * Get the result of page layout analysis as a leptonica-style
+		 * Boxa, Pixa pair, in reading order.
+		 * Can be called before or after Recognize.
+		 */
+		Boxa* GetRegions(Pixa** pixa);
+
+		/**
+		 * Get the textlines as a leptonica-style
+		 * Boxa, Pixa pair, in reading order.
+		 * Can be called before or after Recognize.
+		 * If raw_image is true, then extract from the original image instead of the
+		 * thresholded image and pad by raw_padding pixels.
+		 * If blockids is not NULL, the block-id of each line is also returned as an
+		 * array of one element per line. delete [] after use.
+		 * If paraids is not NULL, the paragraph-id of each line within its block is
+		 * also returned as an array of one element per line. delete [] after use.
+		 */
+		Boxa* GetTextlines(const bool raw_image, const int raw_padding,
+			Pixa** pixa, int** blockids, int** paraids);
+		/*
+		   Helper method to extract from the thresholded image. (most common usage)
+		*/
+		Boxa* GetTextlines(Pixa** pixa, int** blockids) {
+			return GetTextlines(false, 0, pixa, blockids, NULL);
+		}
+
+		/**
+		 * Get textlines and strips of image regions as a leptonica-style Boxa, Pixa
+		 * pair, in reading order. Enables downstream handling of non-rectangular
+		 * regions.
+		 * Can be called before or after Recognize.
+		 * If blockids is not NULL, the block-id of each line is also returned as an
+		 * array of one element per line. delete [] after use.
+		 */
+		Boxa* GetStrips(Pixa** pixa, int** blockids);
+
+		/**
+		 * Get the words as a leptonica-style
+		 * Boxa, Pixa pair, in reading order.
+		 * Can be called before or after Recognize.
+		 */
+		Boxa* GetWords(Pixa** pixa);
+
+		/**
+		 * Gets the individual connected (text) components (created
+		 * after pages segmentation step, but before recognition)
+		 * as a leptonica-style Boxa, Pixa pair, in reading order.
+		 * Can be called before or after Recognize.
+		 * Note: the caller is responsible for calling boxaDestroy()
+		 * on the returned Boxa array and pixaDestroy() on cc array.
+		 */
+		Boxa* GetConnectedComponents(Pixa** cc);
+
+		/**
+		 * Get the given level kind of components (block, textline, word etc.) as a
+		 * leptonica-style Boxa, Pixa pair, in reading order.
+		 * Can be called before or after Recognize.
+		 * If blockids is not NULL, the block-id of each component is also returned
+		 * as an array of one element per component. delete [] after use.
+		 * If blockids is not NULL, the paragraph-id of each component with its block
+		 * is also returned as an array of one element per component. delete [] after
+		 * use.
+		 * If raw_image is true, then portions of the original image are extracted
+		 * instead of the thresholded image and padded with raw_padding.
+		 * If text_only is true, then only text components are returned.
+		 */
+		Boxa* GetComponentImages(const PageIteratorLevel level,
+			const bool text_only, const bool raw_image,
+			const int raw_padding,
+			Pixa** pixa, int** blockids, int** paraids);
+		// Helper function to get binary images with no padding (most common usage).
+		Boxa* GetComponentImages(const PageIteratorLevel level,
+			const bool text_only,
+			Pixa** pixa, int** blockids) {
+			return GetComponentImages(level, text_only, false, 0, pixa, blockids, NULL);
+		}
+
+		/**
+		 * Returns the scale factor of the thresholded image that would be returned by
+		 * GetThresholdedImage() and the various GetX() methods that call
+		 * GetComponentImages().
+		 * Returns 0 if no thresholder has been set.
+		 */
+		int GetThresholdedImageScaleFactor() const;
+
+		/**
+		 * Dump the internal binary image to a PGM file.
+		 * @deprecated Use GetThresholdedImage and write the image using pixWrite
+		 * instead if possible.
+		 */
+		void DumpPGM(const char* filename);
+
+		/**
+		 * Runs page layout analysis in the mode set by SetPageSegMode.
+		 * May optionally be called prior to Recognize to get access to just
+		 * the page layout results. Returns an iterator to the results.
+		 * If merge_similar_words is true, words are combined where suitable for use
+		 * with a line recognizer. Use if you want to use AnalyseLayout to find the
+		 * textlines, and then want to process textline fragments with an external
+		 * line recognizer.
+		 * Returns NULL on error or an empty page.
+		 * The returned iterator must be deleted after use.
+		 * WARNING! This class points to data held within the TessBaseAPI class, and
+		 * therefore can only be used while the TessBaseAPI class still exists and
+		 * has not been subjected to a call of Init, SetImage, Recognize, Clear, End
+		 * DetectOS, or anything else that changes the internal PAGE_RES.
+		 */
+		PageIterator* AnalyseLayout();
+		PageIterator* AnalyseLayout(bool merge_similar_words);
+		int AnalyseLayout1();
+
+		/**
+		 * Recognize the image from SetAndThresholdImage, generating Tesseract
+		 * internal structures. Returns 0 on success.
+		 * Optional. The Get*Text functions below will call Recognize if needed.
+		 * After Recognize, the output is kept internally until the next SetImage.
+		 */
+		int Recognize(ETEXT_DESC* monitor);
+
+		/**
+		 * Methods to retrieve information after SetAndThresholdImage(),
+		 * Recognize() or TesseractRect(). (Recognize is called implicitly if needed.)
+		 */
+
+		 /** Variant on Recognize used for testing chopper. */
+		int RecognizeForChopTest(ETEXT_DESC* monitor);
+
+		/**
+		 * Turns images into symbolic text.
+		 *
+		 * filename can point to a single image, a multi-page TIFF,
+		 * or a plain text list of image filenames.
+		 *
+		 * retry_config is useful for debugging. If not NULL, you can fall
+		 * back to an alternate configuration if a page fails for some
+		 * reason.
+		 *
+		 * timeout_millisec terminates processing if any single page
+		 * takes too long. Set to 0 for unlimited time.
+		 *
+		 * renderer is responible for creating the output. For example,
+		 * use the TessTextRenderer if you want plaintext output, or
+		 * the TessPDFRender to produce searchable PDF.
+		 *
+		 * If tessedit_page_number is non-negative, will only process that
+		 * single page. Works for multi-page tiff file, or filelist.
+		 *
+		 * Returns true if successful, false on error.
+		 */
+		bool ProcessPages(const char* filename, const char* retry_config,
+			int timeout_millisec, TessResultRenderer* renderer);
+		// Does the real work of ProcessPages.
+		bool ProcessPagesInternal(const char* filename, const char* retry_config,
+			int timeout_millisec, TessResultRenderer* renderer);
+
+		/**
+		 * Turn a single image into symbolic text.
+		 *
+		 * The pix is the image processed. filename and page_index are
+		 * metadata used by side-effect processes, such as reading a box
+		 * file or formatting as hOCR.
+		 *
+		 * See ProcessPages for desciptions of other parameters.
+		 */
+		bool ProcessPage(Pix* pix, int page_index, const char* filename,
+			const char* retry_config, int timeout_millisec,
+			TessResultRenderer* renderer,
+			const char* jpgdata, int len);
+
+		/**
+		 * Get a reading-order iterator to the results of LayoutAnalysis and/or
+		 * Recognize. The returned iterator must be deleted after use.
+		 * WARNING! This class points to data held within the TessBaseAPI class, and
+		 * therefore can only be used while the TessBaseAPI class still exists and
+		 * has not been subjected to a call of Init, SetImage, Recognize, Clear, End
+		 * DetectOS, or anything else that changes the internal PAGE_RES.
+		 */
+		ResultIterator* GetIterator();
+
+		/**
+		 * Get a mutable iterator to the results of LayoutAnalysis and/or Recognize.
+		 * The returned iterator must be deleted after use.
+		 * WARNING! This class points to data held within the TessBaseAPI class, and
+		 * therefore can only be used while the TessBaseAPI class still exists and
+		 * has not been subjected to a call of Init, SetImage, Recognize, Clear, End
+		 * DetectOS, or anything else that changes the internal PAGE_RES.
+		 */
+		MutableIterator* GetMutableIterator();
+
+		/**
+		 * The recognized text is returned as a char* which is coded
+		 * as UTF8 and must be freed with the delete [] operator.
+		 */
+		char* GetUTF8Text();
+
+		/**
+		 * Make a HTML-formatted string with hOCR markup from the internal
+		 * data structures.
+		 * page_number is 0-based but will appear in the output as 1-based.
+		 * monitor can be used to
+		 *  cancel the recognition
+		 *  receive progress callbacks
+		 */
+		char* GetHOCRText(ETEXT_DESC* monitor, int page_number);
+
+		/**
+		 * Make a HTML-formatted string with hOCR markup from the internal
+		 * data structures.
+		 * page_number is 0-based but will appear in the output as 1-based.
+		 */
+		char* GetHOCRText(int page_number);
+
+		/**
+		 * Make a TSV-formatted string from the internal data structures.
+		 * page_number is 0-based but will appear in the output as 1-based.
+		 */
+		char* GetTSVText(int page_number);
+
+		/**
+		 * The recognized text is returned as a char* which is coded in the same
+		 * format as a box file used in training. Returned string must be freed with
+		 * the delete [] operator.
+		 * Constructs coordinates in the original image - not just the rectangle.
+		 * page_number is a 0-based page index that will appear in the box file.
+		 */
+		char* GetBoxText(int page_number);
+
+		/**
+		 * The recognized text is returned as a char* which is coded
+		 * as UNLV format Latin-1 with specific reject and suspect codes
+		 * and must be freed with the delete [] operator.
+		 */
+		char* GetUNLVText();
+
+		/**
+		 * Detect the orientation of the input image and apparent script (alphabet).
+		 * orient_deg is the detected clockwise rotation of the input image in degrees (0, 90, 180, 270)
+		 * orient_conf is the confidence (15.0 is reasonably confident)
+		 * script_name is an ASCII string, the name of the script, e.g. "Latin"
+		 * script_conf is confidence level in the script
+		 * Returns true on success and writes values to each parameter as an output
+		 */
+		bool DetectOrientationScript(int* orient_deg, float* orient_conf, const char** script_name, float* script_conf);
+
+		/**
+		 * The recognized text is returned as a char* which is coded
+		 * as UTF8 and must be freed with the delete [] operator.
+		 * page_number is a 0-based page index that will appear in the osd file.
+		 */
+		char* GetOsdText(int page_number);
+
+		/** Returns the (average) confidence value between 0 and 100. */
+		int MeanTextConf();
+		/**
+		 * Returns all word confidences (between 0 and 100) in an array, terminated
+		 * by -1.  The calling function must delete [] after use.
+		 * The number of confidences should correspond to the number of space-
+		 * delimited words in GetUTF8Text.
+		 */
+		int* AllWordConfidences();
+
+		/**
+		 * Applies the given word to the adaptive classifier if possible.
+		 * The word must be SPACE-DELIMITED UTF-8 - l i k e t h i s , so it can
+		 * tell the boundaries of the graphemes.
+		 * Assumes that SetImage/SetRectangle have been used to set the image
+		 * to the given word. The mode arg should be PSM_SINGLE_WORD or
+		 * PSM_CIRCLE_WORD, as that will be used to control layout analysis.
+		 * The currently set PageSegMode is preserved.
+		 * Returns false if adaption was not possible for some reason.
+		 */
+		bool AdaptToWordStr(PageSegMode mode, const char* wordstr);
+
+		/**
+		 * Free up recognition results and any stored image data, without actually
+		 * freeing any recognition data that would be time-consuming to reload.
+		 * Afterwards, you must call SetImage or TesseractRect before doing
+		 * any Recognize or Get* operation.
+		 */
+		void Clear();
+
+		/**
+		 * Close down tesseract and free up all memory. End() is equivalent to
+		 * destructing and reconstructing your TessBaseAPI.
+		 * Once End() has been used, none of the other API functions may be used
+		 * other than Init and anything declared above it in the class definition.
+		 */
+		void End();
+
+		/**
+		 * Clear any library-level memory caches.
+		 * There are a variety of expensive-to-load constant data structures (mostly
+		 * language dictionaries) that are cached globally -- surviving the Init()
+		 * and End() of individual TessBaseAPI's.  This function allows the clearing
+		 * of these caches.
+		 **/
+		static void ClearPersistentCache();
+
+		/**
+		 * Check whether a word is valid according to Tesseract's language model
+		 * @return 0 if the word is invalid, non-zero if valid.
+		 * @warning temporary! This function will be removed from here and placed
+		 * in a separate API at some future time.
+		 */
+		int IsValidWord(const char *word);
+		// Returns true if utf8_character is defined in the UniCharset.
+		bool IsValidCharacter(const char *utf8_character);
+
+
+		bool GetTextDirection(int* out_offset, float* out_slope);
+
+		/** Sets Dict::letter_is_okay_ function to point to the given function. */
+		void SetDictFunc(DictFunc f);
+
+		/** Sets Dict::probability_in_context_ function to point to the given
+		 * function.
+		 */
+		void SetProbabilityInContextFunc(ProbabilityInContextFunc f);
+
+		/** Sets Wordrec::fill_lattice_ function to point to the given function. */
+		void SetFillLatticeFunc(FillLatticeFunc f);
+
+		/**
+		 * Estimates the Orientation And Script of the image.
+		 * @return true if the image was processed successfully.
+		 */
+		bool DetectOS(OSResults*);
+
+		/** This method returns the features associated with the input image. */
+		void GetFeaturesForBlob(TBLOB* blob, INT_FEATURE_STRUCT* int_features,
+			int* num_features, int* feature_outline_index);
+
+		/**
+		 * This method returns the row to which a box of specified dimensions would
+		 * belong. If no good match is found, it returns NULL.
+		 */
+		static ROW* FindRowForBox(BLOCK_LIST* blocks, int left, int top,
+			int right, int bottom);
+
+		/**
+		 * Method to run adaptive classifier on a blob.
+		 * It returns at max num_max_matches results.
+		 */
+		void RunAdaptiveClassifier(TBLOB* blob,
+			int num_max_matches,
+			int* unichar_ids,
+			float* ratings,
+			int* num_matches_returned);
+
+		/** This method returns the string form of the specified unichar. */
+		const char* GetUnichar(int unichar_id);
+
+		/** Return the pointer to the i-th dawg loaded into tesseract_ object. */
+		const Dawg *GetDawg(int i) const;
+
+		/** Return the number of dawgs loaded into tesseract_ object. */
+		int NumDawgs() const;
+
+		/** Returns a ROW object created from the input row specification. */
+		static ROW *MakeTessOCRRow(float baseline, float xheight,
+			float descender, float ascender);
+
+		/** Returns a TBLOB corresponding to the entire input image. */
+		static TBLOB *MakeTBLOB(Pix *pix);
+
+		/**
+		 * This method baseline normalizes a TBLOB in-place. The input row is used
+		 * for normalization. The denorm is an optional parameter in which the
+		 * normalization-antidote is returned.
+		 */
+		static void NormalizeTBLOB(TBLOB *tblob, ROW *row, bool numeric_mode);
+
+		Tesseract* tesseract() const { return tesseract_; }
+
+		OcrEngineMode oem() const { return last_oem_requested_; }
+
+		void InitTruthCallback(TruthCallback *cb) { truth_cb_ = cb; }
+
+#ifndef NO_CUBE_BUILD
+		/** Return a pointer to underlying CubeRecoContext object if present. */
+		CubeRecoContext *GetCubeRecoContext() const;
+#endif  // NO_CUBE_BUILD
+
+		void set_min_orientation_margin(double margin);
+
+		/**
+		 * Return text orientation of each block as determined by an earlier run
+		 * of layout analysis.
+		 */
+		void GetBlockTextOrientations(int** block_orientation,
+			bool** vertical_writing);
+
+		/** Find lines from the image making the BLOCK_LIST. */
+		BLOCK_LIST* FindLinesCreateBlockList();
+
+		/**
+		 * Delete a block list.
+		 * This is to keep BLOCK_LIST pointer opaque
+		 * and let go of including the other headers.
+		 */
+		static void DeleteBlockList(BLOCK_LIST* block_list);
+		/* @} */
+
+	protected:
+
+		/** Common code for setting the image. Returns true if Init has been called. */
+		TESS_LOCAL bool InternalSetImage();
+
+		/**
+		 * Run the thresholder to make the thresholded image. If pix is not NULL,
+		 * the source is thresholded to pix instead of the internal IMAGE.
+		 */
+		TESS_LOCAL virtual void Threshold(Pix** pix);
+
+		/**
+		 * Find lines from the image making the BLOCK_LIST.
+		 * @return 0 on success.
+		 */
+		TESS_LOCAL int FindLines();
+
+		/** Delete the pageres and block list ready for a new page. */
+		void ClearResults();
+
+		/**
+		 * Return an LTR Result Iterator -- used only for training, as we really want
+		 * to ignore all BiDi smarts at that point.
+		 * delete once you're done with it.
+		 */
+		TESS_LOCAL LTRResultIterator* GetLTRIterator();
+
+		/**
+		 * Return the length of the output text string, as UTF8, assuming
+		 * one newline per line and one per block, with a terminator,
+		 * and assuming a single character reject marker for each rejected character.
+		 * Also return the number of recognized blobs in blob_count.
+		 */
+		TESS_LOCAL int TextLength(int* blob_count);
+
+		/** @defgroup ocropusAddOns ocropus add-ons */
+		/* @{ */
+
+		/**
+		 * Adapt to recognize the current image as the given character.
+		 * The image must be preloaded and be just an image of a single character.
+		 */
+		TESS_LOCAL void AdaptToCharacter(const char *unichar_repr,
+			int length,
+			float baseline,
+			float xheight,
+			float descender,
+			float ascender);
+
+		/** Recognize text doing one pass only, using settings for a given pass. */
+		TESS_LOCAL PAGE_RES* RecognitionPass1(BLOCK_LIST* block_list);
+		TESS_LOCAL PAGE_RES* RecognitionPass2(BLOCK_LIST* block_list,
+			PAGE_RES* pass1_result);
+
+		//// paragraphs.cpp ////////////////////////////////////////////////////
+		TESS_LOCAL void DetectParagraphs(bool after_text_recognition);
+
+		/**
+		 * Extract the OCR results, costs (penalty points for uncertainty),
+		 * and the bounding boxes of the characters.
+		 */
+		TESS_LOCAL static int TesseractExtractResult(char** text,
+			int** lengths,
+			float** costs,
+			int** x0,
+			int** y0,
+			int** x1,
+			int** y1,
+			PAGE_RES* page_res);
+
+		TESS_LOCAL const PAGE_RES* GetPageRes() const { return page_res_; }
+		/* @} */
+
+
+	protected:
+		Tesseract*        tesseract_;       ///< The underlying data object.
+		Tesseract*        osd_tesseract_;   ///< For orientation & script detection.
+		EquationDetect*   equ_detect_;      ///<The equation detector.
+		ImageThresholder* thresholder_;     ///< Image thresholding module.
+		GenericVector<ParagraphModel *>* paragraph_models_;
+		BLOCK_LIST*       block_list_;      ///< The page layout.
+		PAGE_RES*         page_res_;        ///< The page-level data.
+		STRING*           input_file_;      ///< Name used by training code.
+		STRING*           output_file_;     ///< Name used by debug code.
+		STRING*           datapath_;        ///< Current location of tessdata.
+		STRING*           language_;        ///< Last initialized language.
+		OcrEngineMode last_oem_requested_;  ///< Last ocr language mode requested.
+		bool          recognition_done_;   ///< page_res_ contains recognition data.
+		TruthCallback *truth_cb_;           /// fxn for setting truth_* in WERD_RES
+
+		/**
+		 * @defgroup ThresholderParams Thresholder Parameters
+		 * Parameters saved from the Thresholder. Needed to rebuild coordinates.
+		 */
+		 /* @{ */
+		int rect_left_;
+		int rect_top_;
+		int rect_width_;
+		int rect_height_;
+		int image_width_;
+		int image_height_;
+		/* @} */
+
+	private:
+		// A list of image filenames gets special consideration
+		bool ProcessPagesFileList(FILE *fp,
+			STRING *buf,
+			const char* retry_config, int timeout_millisec,
+			TessResultRenderer* renderer,
+			int tessedit_page_number);
+		// TIFF supports multipage so gets special consideration.
+		bool ProcessPagesMultipageTiff(const unsigned char *data,
+			size_t size,
+			const char* filename,
+			const char* retry_config,
+			int timeout_millisec,
+			TessResultRenderer* renderer,
+			int tessedit_page_number);
+		// There's currently no way to pass a document title from the
+		// Tesseract command line, and we have multiple places that choose
+		// to set the title to an empty string. Using a single named
+		// variable will hopefully reduce confusion if the situation changes
+		// in the future.
+		const char *unknown_title_;
+	};  // class TessBaseAPI.
+
+	/** Escape a char string - remove &<>"' with HTML codes. */
+	STRING HOcrEscape(const char* text);
+}  // namespace tesseract.
+
+#endif  // TESSERACT_API_BASEAPI_H__
--- a/hgdriver/3rdparty/hgOCR/include/api/capi.cpp
+++ b/hgdriver/3rdparty/hgOCR/include/api/capi.cpp
@ -0,0 +1,826 @@
+///////////////////////////////////////////////////////////////////////
+// File:        capi.cpp
+// Description: C-API TessBaseAPI
+//
+// (C) Copyright 2012, Google Inc.
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+// http://www.apache.org/licenses/LICENSE-2.0
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+///////////////////////////////////////////////////////////////////////
+
+#ifndef TESS_CAPI_INCLUDE_BASEAPI
+#   define TESS_CAPI_INCLUDE_BASEAPI
+#endif
+#include "capi.h"
+#include "genericvector.h"
+#include "strngs.h"
+
+TESS_API const char* TESS_CALL TessVersion()
+{
+	return TessBaseAPI::Version();
+}
+
+TESS_API void TESS_CALL TessDeleteText(char* text)
+{
+	delete[] text;
+}
+
+TESS_API void TESS_CALL TessDeleteTextArray(char** arr)
+{
+	for (char** pos = arr; *pos != NULL; ++pos)
+		delete[] * pos;
+	delete[] arr;
+}
+
+TESS_API void TESS_CALL TessDeleteIntArray(int* arr)
+{
+	delete[] arr;
+}
+
+TESS_API void TESS_CALL TessDeleteBlockList(BLOCK_LIST* block_list)
+{
+	TessBaseAPI::DeleteBlockList(block_list);
+}
+
+TESS_API TessResultRenderer* TESS_CALL TessTextRendererCreate(const char* outputbase)
+{
+	return new TessTextRenderer(outputbase);
+}
+
+TESS_API TessResultRenderer* TESS_CALL TessHOcrRendererCreate(const char* outputbase)
+{
+	return new TessHOcrRenderer(outputbase);
+}
+
+TESS_API TessResultRenderer* TESS_CALL TessHOcrRendererCreate2(const char* outputbase, BOOL font_info)
+{
+	return new TessHOcrRenderer(outputbase, font_info);
+}
+
+TESS_API TessResultRenderer* TESS_CALL TessPDFRendererCreate(const char* outputbase, const char* datadir)
+{
+	return new TessPDFRenderer(outputbase, datadir, false);
+}
+
+TESS_API TessResultRenderer* TESS_CALL TessPDFRendererCreateTextonly(const char* outputbase, const char* datadir,
+	BOOL textonly)
+{
+	return new TessPDFRenderer(outputbase, datadir, textonly);
+}
+
+TESS_API TessResultRenderer* TESS_CALL TessUnlvRendererCreate(const char* outputbase)
+{
+	return new TessUnlvRenderer(outputbase);
+}
+
+TESS_API TessResultRenderer* TESS_CALL TessBoxTextRendererCreate(const char* outputbase)
+{
+	return new TessBoxTextRenderer(outputbase);
+}
+
+TESS_API void TESS_CALL TessDeleteResultRenderer(TessResultRenderer* renderer)
+{
+	delete renderer;
+}
+
+TESS_API void TESS_CALL TessResultRendererInsert(TessResultRenderer* renderer, TessResultRenderer* next)
+{
+	renderer->insert(next);
+}
+
+TESS_API TessResultRenderer* TESS_CALL TessResultRendererNext(TessResultRenderer* renderer)
+{
+	return renderer->next();
+}
+
+TESS_API BOOL TESS_CALL TessResultRendererBeginDocument(TessResultRenderer* renderer, const char* title)
+{
+	return renderer->BeginDocument(title);
+}
+
+TESS_API BOOL TESS_CALL TessResultRendererAddImage(TessResultRenderer* renderer, TessBaseAPI* api)
+{
+	return renderer->AddImage(api, nullptr, 0);
+}
+
+TESS_API BOOL TESS_CALL TessResultRendererEndDocument(TessResultRenderer* renderer)
+{
+	return renderer->EndDocument();
+}
+
+TESS_API const char* TESS_CALL TessResultRendererExtention(TessResultRenderer* renderer)
+{
+	return renderer->file_extension();
+}
+
+TESS_API const char* TESS_CALL TessResultRendererTitle(TessResultRenderer* renderer)
+{
+	return renderer->title();
+}
+
+TESS_API int TESS_CALL TessResultRendererImageNum(TessResultRenderer* renderer)
+{
+	return renderer->imagenum();
+}
+
+TESS_API TessBaseAPI* TESS_CALL TessBaseAPICreate()
+{
+	return new TessBaseAPI;
+}
+
+TESS_API void TESS_CALL TessBaseAPIDelete(TessBaseAPI* handle)
+{
+	delete handle;
+}
+
+TESS_API size_t TESS_CALL TessBaseAPIGetOpenCLDevice(TessBaseAPI* handle, void **device)
+{
+	return handle->getOpenCLDevice(device);
+}
+
+TESS_API void TESS_CALL TessBaseAPISetInputName(TessBaseAPI* handle, const char* name)
+{
+	handle->SetInputName(name);
+}
+
+TESS_API const char* TESS_CALL TessBaseAPIGetInputName(TessBaseAPI* handle)
+{
+	return handle->GetInputName();
+}
+
+TESS_API void TESS_CALL TessBaseAPISetInputImage(TessBaseAPI* handle, Pix* pix)
+{
+	handle->SetInputImage(pix);
+}
+
+TESS_API Pix* TESS_CALL TessBaseAPIGetInputImage(TessBaseAPI* handle)
+{
+	return handle->GetInputImage();
+}
+
+TESS_API int TESS_CALL TessBaseAPIGetSourceYResolution(TessBaseAPI* handle)
+{
+	return handle->GetSourceYResolution();
+}
+
+TESS_API const char* TESS_CALL TessBaseAPIGetDatapath(TessBaseAPI* handle)
+{
+	return handle->GetDatapath();
+}
+
+TESS_API void TESS_CALL TessBaseAPISetOutputName(TessBaseAPI* handle, const char* name)
+{
+	handle->SetOutputName(name);
+}
+
+TESS_API BOOL TESS_CALL TessBaseAPISetVariable(TessBaseAPI* handle, const char* name, const char* value)
+{
+	return handle->SetVariable(name, value) ? TRUE : FALSE;
+}
+
+TESS_API BOOL TESS_CALL TessBaseAPISetDebugVariable(TessBaseAPI* handle, const char* name, const char* value)
+{
+	return handle->SetVariable(name, value) ? TRUE : FALSE;
+}
+
+TESS_API BOOL TESS_CALL TessBaseAPIGetIntVariable(const TessBaseAPI* handle, const char* name, int* value)
+{
+	return handle->GetIntVariable(name, value) ? TRUE : FALSE;
+}
+
+TESS_API BOOL TESS_CALL TessBaseAPIGetBoolVariable(const TessBaseAPI* handle, const char* name, BOOL* value)
+{
+	bool boolValue;
+	if (handle->GetBoolVariable(name, &boolValue))
+	{
+		*value = boolValue ? TRUE : FALSE;
+		return TRUE;
+	}
+	else
+	{
+		return FALSE;
+	}
+}
+
+TESS_API BOOL TESS_CALL TessBaseAPIGetDoubleVariable(const TessBaseAPI* handle, const char* name, double* value)
+{
+	return handle->GetDoubleVariable(name, value) ? TRUE : FALSE;
+}
+
+TESS_API const char* TESS_CALL TessBaseAPIGetStringVariable(const TessBaseAPI* handle, const char* name)
+{
+	return handle->GetStringVariable(name);
+}
+
+TESS_API void TESS_CALL TessBaseAPIPrintVariables(const TessBaseAPI* handle, FILE* fp)
+{
+	handle->PrintVariables(fp);
+}
+
+TESS_API BOOL TESS_CALL TessBaseAPIPrintVariablesToFile(const TessBaseAPI* handle, const char* filename)
+{
+	FILE* fp = fopen(filename, "w");
+	if (fp != NULL)
+	{
+		handle->PrintVariables(fp);
+		fclose(fp);
+		return TRUE;
+	}
+	return FALSE;
+}
+
+TESS_API BOOL TESS_CALL TessBaseAPIGetVariableAsString(TessBaseAPI* handle, const char* name, STRING* val)
+{
+	return handle->GetVariableAsString(name, val) ? TRUE : FALSE;
+}
+
+TESS_API int TESS_CALL TessBaseAPIInit4(TessBaseAPI* handle, const char* datapath, const char* language,
+	TessOcrEngineMode mode, char** configs, int configs_size,
+	char** vars_vec, char** vars_values, size_t vars_vec_size,
+	BOOL set_only_non_debug_params)
+{
+	GenericVector<STRING> varNames;
+	GenericVector<STRING> varValues;
+	if (vars_vec != NULL && vars_values != NULL) {
+		for (size_t i = 0; i < vars_vec_size; i++) {
+			varNames.push_back(STRING(vars_vec[i]));
+			varValues.push_back(STRING(vars_values[i]));
+		}
+	}
+
+	return handle->Init(datapath, language, mode, configs, configs_size, &varNames, &varValues, set_only_non_debug_params);
+}
+
+
+TESS_API int TESS_CALL TessBaseAPIInit1(TessBaseAPI* handle, const char* datapath, const char* language, TessOcrEngineMode oem,
+	char** configs, int configs_size)
+{
+	return handle->Init(datapath, language, oem, configs, configs_size, NULL, NULL, false);
+}
+
+TESS_API int TESS_CALL TessBaseAPIInit2(TessBaseAPI* handle, const char* datapath, const char* language, TessOcrEngineMode oem)
+{
+	return handle->Init(datapath, language, oem);
+}
+
+TESS_API int TESS_CALL TessBaseAPIInit3(TessBaseAPI* handle, const char* datapath, const char* language)
+{
+	return handle->Init(datapath, language);
+}
+
+TESS_API const char* TESS_CALL TessBaseAPIGetInitLanguagesAsString(const TessBaseAPI* handle)
+{
+	return handle->GetInitLanguagesAsString();
+}
+
+TESS_API char** TESS_CALL TessBaseAPIGetLoadedLanguagesAsVector(const TessBaseAPI* handle)
+{
+	GenericVector<STRING> languages;
+	handle->GetLoadedLanguagesAsVector(&languages);
+	char** arr = new char*[languages.size() + 1];
+	for (int index = 0; index < languages.size(); ++index)
+		arr[index] = languages[index].strdup();
+	arr[languages.size()] = NULL;
+	return arr;
+}
+
+TESS_API char** TESS_CALL TessBaseAPIGetAvailableLanguagesAsVector(const TessBaseAPI* handle)
+{
+	GenericVector<STRING> languages;
+	handle->GetAvailableLanguagesAsVector(&languages);
+	char** arr = new char*[languages.size() + 1];
+	for (int index = 0; index < languages.size(); ++index)
+		arr[index] = languages[index].strdup();
+	arr[languages.size()] = NULL;
+	return arr;
+}
+
+TESS_API int TESS_CALL TessBaseAPIInitLangMod(TessBaseAPI* handle, const char* datapath, const char* language)
+{
+	return handle->InitLangMod(datapath, language);
+}
+
+TESS_API void TESS_CALL TessBaseAPIInitForAnalysePage(TessBaseAPI* handle)
+{
+	handle->InitForAnalysePage();
+}
+
+TESS_API void TESS_CALL TessBaseAPIReadConfigFile(TessBaseAPI* handle, const char* filename)
+{
+	handle->ReadConfigFile(filename);
+}
+
+TESS_API void TESS_CALL TessBaseAPIReadDebugConfigFile(TessBaseAPI* handle, const char* filename)
+{
+	handle->ReadDebugConfigFile(filename);
+}
+
+TESS_API void TESS_CALL TessBaseAPISetPageSegMode(TessBaseAPI* handle, TessPageSegMode mode)
+{
+	handle->SetPageSegMode(mode);
+}
+
+TESS_API TessPageSegMode TESS_CALL TessBaseAPIGetPageSegMode(const TessBaseAPI* handle)
+{
+	return handle->GetPageSegMode();
+}
+
+TESS_API char* TESS_CALL TessBaseAPIRect(TessBaseAPI* handle, const unsigned char* imagedata,
+	int bytes_per_pixel, int bytes_per_line,
+	int left, int top, int width, int height)
+{
+	return handle->TesseractRect(imagedata, bytes_per_pixel, bytes_per_line, left, top, width, height);
+}
+
+TESS_API void TESS_CALL TessBaseAPIClearAdaptiveClassifier(TessBaseAPI* handle)
+{
+	handle->ClearAdaptiveClassifier();
+}
+
+TESS_API void TESS_CALL TessBaseAPISetImage(TessBaseAPI* handle, const unsigned char* imagedata, int width, int height,
+	int bytes_per_pixel, int bytes_per_line)
+{
+	handle->SetImage(imagedata, width, height, bytes_per_pixel, bytes_per_line);
+}
+
+TESS_API void TESS_CALL TessBaseAPISetImage2(TessBaseAPI* handle, struct Pix* pix)
+{
+	return handle->SetImage(pix);
+}
+
+TESS_API void TESS_CALL TessBaseAPISetSourceResolution(TessBaseAPI* handle, int ppi)
+{
+	handle->SetSourceResolution(ppi);
+}
+
+TESS_API void TESS_CALL TessBaseAPISetRectangle(TessBaseAPI* handle, int left, int top, int width, int height)
+{
+	handle->SetRectangle(left, top, width, height);
+}
+
+TESS_API void TESS_CALL TessBaseAPISetThresholder(TessBaseAPI* handle, TessImageThresholder* thresholder)
+{
+	handle->SetThresholder(thresholder);
+}
+
+TESS_API struct Pix* TESS_CALL TessBaseAPIGetThresholdedImage(TessBaseAPI* handle)
+{
+	return handle->GetThresholdedImage();
+}
+
+TESS_API struct Boxa* TESS_CALL TessBaseAPIGetRegions(TessBaseAPI* handle, struct Pixa** pixa)
+{
+	return handle->GetRegions(pixa);
+}
+
+TESS_API struct Boxa* TESS_CALL TessBaseAPIGetTextlines(TessBaseAPI* handle, struct Pixa** pixa, int** blockids)
+{
+	return handle->GetTextlines(pixa, blockids);
+}
+
+TESS_API struct Boxa* TESS_CALL TessBaseAPIGetTextlines1(TessBaseAPI* handle, const BOOL raw_image, const int raw_padding,
+	struct Pixa** pixa, int** blockids, int** paraids)
+{
+	return handle->GetTextlines(raw_image, raw_padding, pixa, blockids, paraids);
+}
+
+TESS_API struct Boxa* TESS_CALL TessBaseAPIGetStrips(TessBaseAPI* handle, struct Pixa** pixa, int** blockids)
+{
+	return handle->GetStrips(pixa, blockids);
+}
+
+TESS_API struct Boxa* TESS_CALL TessBaseAPIGetWords(TessBaseAPI* handle, struct Pixa** pixa)
+{
+	return handle->GetWords(pixa);
+}
+
+TESS_API struct Boxa* TESS_CALL TessBaseAPIGetConnectedComponents(TessBaseAPI* handle, struct Pixa** cc)
+{
+	return handle->GetConnectedComponents(cc);
+}
+
+TESS_API struct Boxa* TESS_CALL TessBaseAPIGetComponentImages(TessBaseAPI* handle, TessPageIteratorLevel level, BOOL text_only, struct Pixa** pixa, int** blockids)
+{
+	return handle->GetComponentImages(level, text_only != FALSE, pixa, blockids);
+}
+
+TESS_API struct Boxa*
+TESS_CALL TessBaseAPIGetComponentImages1(TessBaseAPI* handle, const TessPageIteratorLevel level, const BOOL text_only,
+	const BOOL raw_image, const int raw_padding,
+	struct Pixa** pixa, int** blockids, int** paraids)
+{
+	return handle->GetComponentImages(level, text_only != FALSE, raw_image, raw_padding, pixa, blockids, paraids);
+}
+
+TESS_API int TESS_CALL TessBaseAPIGetThresholdedImageScaleFactor(const TessBaseAPI* handle)
+{
+	return handle->GetThresholdedImageScaleFactor();
+}
+
+TESS_API void TESS_CALL TessBaseAPIDumpPGM(TessBaseAPI* handle, const char* filename)
+{
+	handle->DumpPGM(filename);
+}
+
+TESS_API TessPageIterator* TESS_CALL TessBaseAPIAnalyseLayout(TessBaseAPI* handle)
+{
+	return handle->AnalyseLayout();
+}
+
+TESS_API int TESS_CALL TessBaseAPIRecognize(TessBaseAPI* handle, ETEXT_DESC* monitor)
+{
+	return handle->Recognize(monitor);
+}
+
+TESS_API int TESS_CALL TessBaseAPIRecognizeForChopTest(TessBaseAPI* handle, ETEXT_DESC* monitor)
+{
+	return handle->RecognizeForChopTest(monitor);
+}
+
+TESS_API BOOL TESS_CALL TessBaseAPIProcessPages(TessBaseAPI* handle, const char* filename, const char* retry_config,
+	int timeout_millisec, TessResultRenderer* renderer)
+{
+	if (handle->ProcessPages(filename, retry_config, timeout_millisec, renderer))
+		return TRUE;
+	else
+		return FALSE;
+}
+
+TESS_API BOOL TESS_CALL TessBaseAPIProcessPage(TessBaseAPI* handle, struct Pix* pix, int page_index, const char* filename,
+	const char* retry_config, int timeout_millisec, TessResultRenderer* renderer)
+{
+	if (handle->ProcessPage(pix, page_index, filename, retry_config, timeout_millisec, renderer, nullptr, 0))
+		return TRUE;
+	else
+		return FALSE;
+}
+
+TESS_API TessResultIterator* TESS_CALL TessBaseAPIGetIterator(TessBaseAPI* handle)
+{
+	return handle->GetIterator();
+}
+
+TESS_API TessMutableIterator* TESS_CALL TessBaseAPIGetMutableIterator(TessBaseAPI* handle)
+{
+	return handle->GetMutableIterator();
+}
+
+TESS_API char* TESS_CALL TessBaseAPIGetUTF8Text(TessBaseAPI* handle)
+{
+	return handle->GetUTF8Text();
+}
+
+TESS_API char* TESS_CALL TessBaseAPIGetHOCRText(TessBaseAPI* handle, int page_number)
+{
+	return handle->GetHOCRText(NULL, page_number);
+}
+
+TESS_API char* TESS_CALL TessBaseAPIGetBoxText(TessBaseAPI* handle, int page_number)
+{
+	return handle->GetBoxText(page_number);
+}
+
+TESS_API char* TESS_CALL TessBaseAPIGetUNLVText(TessBaseAPI* handle)
+{
+	return handle->GetUNLVText();
+}
+
+TESS_API int TESS_CALL TessBaseAPIMeanTextConf(TessBaseAPI* handle)
+{
+	return handle->MeanTextConf();
+}
+
+TESS_API int* TESS_CALL TessBaseAPIAllWordConfidences(TessBaseAPI* handle)
+{
+	return handle->AllWordConfidences();
+}
+
+TESS_API BOOL TESS_CALL TessBaseAPIAdaptToWordStr(TessBaseAPI* handle, TessPageSegMode mode, const char* wordstr)
+{
+	return handle->AdaptToWordStr(mode, wordstr) ? TRUE : FALSE;
+}
+
+TESS_API void TESS_CALL TessBaseAPIClear(TessBaseAPI* handle)
+{
+	handle->Clear();
+}
+
+TESS_API void TESS_CALL TessBaseAPIEnd(TessBaseAPI* handle)
+{
+	handle->End();
+}
+
+TESS_API int TESS_CALL TessBaseAPIIsValidWord(TessBaseAPI* handle, const char* word)
+{
+	return handle->IsValidWord(word);
+}
+
+TESS_API BOOL TESS_CALL TessBaseAPIGetTextDirection(TessBaseAPI* handle, int* out_offset, float* out_slope)
+{
+	return handle->GetTextDirection(out_offset, out_slope) ? TRUE : FALSE;
+}
+
+TESS_API void TESS_CALL TessBaseAPISetDictFunc(TessBaseAPI* handle, TessDictFunc f)
+{
+	handle->SetDictFunc(f);
+}
+
+TESS_API void  TESS_CALL TessBaseAPIClearPersistentCache(TessBaseAPI* handle)
+{
+	handle->ClearPersistentCache();
+}
+
+TESS_API void TESS_CALL TessBaseAPISetProbabilityInContextFunc(TessBaseAPI* handle, TessProbabilityInContextFunc f)
+{
+	handle->SetProbabilityInContextFunc(f);
+}
+
+TESS_API BOOL TESS_CALL TessBaseAPIDetectOS(TessBaseAPI* handle, OSResults* results)
+{
+	return FALSE; // Unsafe ABI, return FALSE always
+}
+
+TESS_API BOOL TESS_CALL TessBaseAPIDetectOrientationScript(TessBaseAPI* handle,
+	int* orient_deg, float* orient_conf, const char** script_name, float* script_conf)
+{
+	bool success;
+	success = handle->DetectOrientationScript(orient_deg, orient_conf, script_name, script_conf);
+	return (BOOL)success;
+}
+
+
+TESS_API void TESS_CALL TessBaseAPIGetFeaturesForBlob(TessBaseAPI* handle, TBLOB* blob, INT_FEATURE_STRUCT* int_features,
+	int* num_features, int* FeatureOutlineIndex)
+{
+	handle->GetFeaturesForBlob(blob, int_features, num_features, FeatureOutlineIndex);
+}
+
+TESS_API ROW* TESS_CALL TessFindRowForBox(BLOCK_LIST* blocks, int left, int top, int right, int bottom)
+{
+	return TessBaseAPI::FindRowForBox(blocks, left, top, right, bottom);
+}
+
+TESS_API void TESS_CALL TessBaseAPIRunAdaptiveClassifier(TessBaseAPI* handle, TBLOB* blob, int num_max_matches,
+	int* unichar_ids, float* ratings, int* num_matches_returned)
+{
+	handle->RunAdaptiveClassifier(blob, num_max_matches, unichar_ids, ratings, num_matches_returned);
+}
+
+TESS_API const char* TESS_CALL TessBaseAPIGetUnichar(TessBaseAPI* handle, int unichar_id)
+{
+	return handle->GetUnichar(unichar_id);
+}
+
+TESS_API const TessDawg* TESS_CALL TessBaseAPIGetDawg(const TessBaseAPI* handle, int i)
+{
+	return handle->GetDawg(i);
+}
+
+TESS_API int TESS_CALL TessBaseAPINumDawgs(const TessBaseAPI* handle)
+{
+	return handle->NumDawgs();
+}
+
+TESS_API ROW* TESS_CALL TessMakeTessOCRRow(float baseline, float xheight, float descender, float ascender)
+{
+	return TessBaseAPI::MakeTessOCRRow(baseline, xheight, descender, ascender);
+}
+
+TESS_API TBLOB* TESS_CALL TessMakeTBLOB(struct Pix* pix)
+{
+	return TessBaseAPI::MakeTBLOB(pix);
+}
+
+TESS_API void TESS_CALL TessNormalizeTBLOB(TBLOB* tblob, ROW* row, BOOL numeric_mode)
+{
+	TessBaseAPI::NormalizeTBLOB(tblob, row, numeric_mode != FALSE);
+}
+
+TESS_API TessOcrEngineMode TESS_CALL TessBaseAPIOem(const TessBaseAPI* handle)
+{
+	return handle->oem();
+}
+
+TESS_API void TESS_CALL TessBaseAPIInitTruthCallback(TessBaseAPI* handle, TessTruthCallback* cb)
+{
+	handle->InitTruthCallback(cb);
+}
+
+#ifndef NO_CUBE_BUILD
+TESS_API TessCubeRecoContext* TESS_CALL TessBaseAPIGetCubeRecoContext(const TessBaseAPI* handle)
+{
+	return handle->GetCubeRecoContext();
+}
+#endif  // NO_CUBE_BUILD
+
+TESS_API void TESS_CALL TessBaseAPISetMinOrientationMargin(TessBaseAPI* handle, double margin)
+{
+	handle->set_min_orientation_margin(margin);
+}
+
+TESS_API void TESS_CALL TessBaseGetBlockTextOrientations(TessBaseAPI* handle, int** block_orientation, bool** vertical_writing)
+{
+	handle->GetBlockTextOrientations(block_orientation, vertical_writing);
+}
+
+TESS_API BLOCK_LIST* TESS_CALL TessBaseAPIFindLinesCreateBlockList(TessBaseAPI* handle)
+{
+	return handle->FindLinesCreateBlockList();
+}
+
+TESS_API void  TESS_CALL TessPageIteratorDelete(TessPageIterator* handle)
+{
+	delete handle;
+}
+
+TESS_API TessPageIterator* TESS_CALL TessPageIteratorCopy(const TessPageIterator* handle)
+{
+	return new TessPageIterator(*handle);
+}
+
+TESS_API void TESS_CALL TessPageIteratorBegin(TessPageIterator* handle)
+{
+	handle->Begin();
+}
+
+TESS_API BOOL TESS_CALL TessPageIteratorNext(TessPageIterator* handle, TessPageIteratorLevel level)
+{
+	return handle->Next(level) ? TRUE : FALSE;
+}
+
+TESS_API BOOL TESS_CALL TessPageIteratorIsAtBeginningOf(const TessPageIterator* handle, TessPageIteratorLevel level)
+{
+	return handle->IsAtBeginningOf(level) ? TRUE : FALSE;
+}
+
+TESS_API BOOL TESS_CALL TessPageIteratorIsAtFinalElement(const TessPageIterator* handle, TessPageIteratorLevel level,
+	TessPageIteratorLevel element)
+{
+	return handle->IsAtFinalElement(level, element) ? TRUE : FALSE;
+}
+
+TESS_API BOOL TESS_CALL TessPageIteratorBoundingBox(const TessPageIterator* handle, TessPageIteratorLevel level,
+	int* left, int* top, int* right, int* bottom)
+{
+	return handle->BoundingBox(level, left, top, right, bottom) ? TRUE : FALSE;
+}
+
+TESS_API TessPolyBlockType TESS_CALL TessPageIteratorBlockType(const TessPageIterator* handle)
+{
+	return handle->BlockType();
+}
+
+TESS_API struct Pix* TESS_CALL TessPageIteratorGetBinaryImage(const TessPageIterator* handle, TessPageIteratorLevel level)
+{
+	return handle->GetBinaryImage(level);
+}
+
+TESS_API struct Pix* TESS_CALL TessPageIteratorGetImage(const TessPageIterator* handle, TessPageIteratorLevel level, int padding,
+	struct Pix* original_image, int* left, int* top)
+{
+	return handle->GetImage(level, padding, original_image, left, top);
+}
+
+TESS_API BOOL TESS_CALL TessPageIteratorBaseline(const TessPageIterator* handle, TessPageIteratorLevel level,
+	int* x1, int* y1, int* x2, int* y2)
+{
+	return handle->Baseline(level, x1, y1, x2, y2) ? TRUE : FALSE;
+}
+
+TESS_API void TESS_CALL TessPageIteratorOrientation(TessPageIterator* handle, TessOrientation* orientation,
+	TessWritingDirection* writing_direction, TessTextlineOrder* textline_order,
+	float* deskew_angle)
+{
+	handle->Orientation(orientation, writing_direction, textline_order, deskew_angle);
+}
+
+TESS_API void  TESS_CALL TessPageIteratorParagraphInfo(TessPageIterator* handle, TessParagraphJustification* justification,
+	BOOL *is_list_item, BOOL *is_crown, int *first_line_indent)
+{
+	bool bool_is_list_item, bool_is_crown;
+	handle->ParagraphInfo(justification, &bool_is_list_item, &bool_is_crown, first_line_indent);
+	if (is_list_item)
+		*is_list_item = bool_is_list_item ? TRUE : FALSE;
+	if (is_crown)
+		*is_crown = bool_is_crown ? TRUE : FALSE;
+}
+
+
+TESS_API void TESS_CALL TessResultIteratorDelete(TessResultIterator* handle)
+{
+	delete handle;
+}
+
+TESS_API TessResultIterator* TESS_CALL TessResultIteratorCopy(const TessResultIterator* handle)
+{
+	return new TessResultIterator(*handle);
+}
+
+TESS_API TessPageIterator* TESS_CALL TessResultIteratorGetPageIterator(TessResultIterator* handle)
+{
+	return handle;
+}
+
+TESS_API const TessPageIterator* TESS_CALL TessResultIteratorGetPageIteratorConst(const TessResultIterator* handle)
+{
+	return handle;
+}
+
+TESS_API TessChoiceIterator* TESS_CALL TessResultIteratorGetChoiceIterator(const TessResultIterator* handle)
+{
+	return new TessChoiceIterator(*handle);
+}
+
+TESS_API BOOL  TESS_CALL TessResultIteratorNext(TessResultIterator* handle, TessPageIteratorLevel level)
+{
+	return handle->Next(level);
+}
+
+TESS_API char* TESS_CALL TessResultIteratorGetUTF8Text(const TessResultIterator* handle, TessPageIteratorLevel level)
+{
+	return handle->GetUTF8Text(level);
+}
+
+TESS_API float TESS_CALL TessResultIteratorConfidence(const TessResultIterator* handle, TessPageIteratorLevel level)
+{
+	return handle->Confidence(level);
+}
+
+TESS_API const char* TESS_CALL TessResultIteratorWordRecognitionLanguage(const TessResultIterator* handle)
+{
+	return handle->WordRecognitionLanguage();
+}
+
+TESS_API const char* TESS_CALL TessResultIteratorWordFontAttributes(const TessResultIterator* handle, BOOL* is_bold, BOOL* is_italic,
+	BOOL* is_underlined, BOOL* is_monospace, BOOL* is_serif,
+	BOOL* is_smallcaps, int* pointsize, int* font_id)
+{
+	bool bool_is_bold, bool_is_italic, bool_is_underlined, bool_is_monospace, bool_is_serif, bool_is_smallcaps;
+	const char* ret = handle->WordFontAttributes(&bool_is_bold, &bool_is_italic, &bool_is_underlined, &bool_is_monospace, &bool_is_serif,
+		&bool_is_smallcaps, pointsize, font_id);
+	if (is_bold)
+		*is_bold = bool_is_bold ? TRUE : FALSE;
+	if (is_italic)
+		*is_italic = bool_is_italic ? TRUE : FALSE;
+	if (is_underlined)
+		*is_underlined = bool_is_underlined ? TRUE : FALSE;
+	if (is_monospace)
+		*is_monospace = bool_is_monospace ? TRUE : FALSE;
+	if (is_serif)
+		*is_serif = bool_is_serif ? TRUE : FALSE;
+	if (is_smallcaps)
+		*is_smallcaps = bool_is_smallcaps ? TRUE : FALSE;
+	return ret;
+}
+
+TESS_API BOOL TESS_CALL TessResultIteratorWordIsFromDictionary(const TessResultIterator* handle)
+{
+	return handle->WordIsFromDictionary() ? TRUE : FALSE;
+}
+
+TESS_API BOOL TESS_CALL TessResultIteratorWordIsNumeric(const TessResultIterator* handle)
+{
+	return handle->WordIsNumeric() ? TRUE : FALSE;
+}
+
+TESS_API BOOL TESS_CALL TessResultIteratorSymbolIsSuperscript(const TessResultIterator* handle)
+{
+	return handle->SymbolIsSuperscript() ? TRUE : FALSE;
+}
+
+TESS_API BOOL TESS_CALL TessResultIteratorSymbolIsSubscript(const TessResultIterator* handle)
+{
+	return handle->SymbolIsSubscript() ? TRUE : FALSE;
+}
+
+TESS_API BOOL TESS_CALL TessResultIteratorSymbolIsDropcap(const TessResultIterator* handle)
+{
+	return handle->SymbolIsDropcap() ? TRUE : FALSE;
+}
+
+TESS_API void TESS_CALL TessChoiceIteratorDelete(TessChoiceIterator* handle)
+{
+	delete handle;
+}
+
+TESS_API BOOL  TESS_CALL TessChoiceIteratorNext(TessChoiceIterator* handle)
+{
+	return handle->Next();
+}
+
+TESS_API const char* TESS_CALL TessChoiceIteratorGetUTF8Text(const TessChoiceIterator* handle)
+{
+	return handle->GetUTF8Text();
+}
+
+TESS_API float TESS_CALL TessChoiceIteratorConfidence(const TessChoiceIterator* handle)
+{
+	return handle->Confidence();
+}
--- a/hgdriver/3rdparty/hgOCR/include/api/capi.h
+++ b/hgdriver/3rdparty/hgOCR/include/api/capi.h
@ -0,0 +1,409 @@
+///////////////////////////////////////////////////////////////////////
+// File:        capi.h
+// Description: C-API TessBaseAPI
+//
+// (C) Copyright 2012, Google Inc.
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+// http://www.apache.org/licenses/LICENSE-2.0
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+///////////////////////////////////////////////////////////////////////
+
+#ifndef API_CAPI_H_
+#define API_CAPI_H_
+
+#ifdef TESS_CAPI_INCLUDE_BASEAPI
+#   include "baseapi.h"
+#   include "pageiterator.h"
+#   include "resultiterator.h"
+#   include "renderer.h"
+#else
+#   include "platform.h"
+#   include <stdio.h>
+#endif
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#ifndef TESS_CALL
+#   if defined(WIN32)
+#       define TESS_CALL __cdecl
+#   else
+#       define TESS_CALL
+#   endif
+#endif
+
+#ifndef BOOL
+#   define BOOL int
+#   define TRUE 1
+#   define FALSE 0
+#endif
+
+#ifdef TESS_CAPI_INCLUDE_BASEAPI
+typedef tesseract::TessResultRenderer TessResultRenderer;
+typedef tesseract::TessTextRenderer TessTextRenderer;
+typedef tesseract::TessHOcrRenderer TessHOcrRenderer;
+typedef tesseract::TessPDFRenderer TessPDFRenderer;
+typedef tesseract::TessUnlvRenderer TessUnlvRenderer;
+typedef tesseract::TessBoxTextRenderer TessBoxTextRenderer;
+typedef tesseract::TessBaseAPI TessBaseAPI;
+typedef tesseract::PageIterator TessPageIterator;
+typedef tesseract::ResultIterator TessResultIterator;
+typedef tesseract::MutableIterator TessMutableIterator;
+typedef tesseract::ChoiceIterator TessChoiceIterator;
+typedef tesseract::OcrEngineMode TessOcrEngineMode;
+typedef tesseract::PageSegMode TessPageSegMode;
+typedef tesseract::ImageThresholder TessImageThresholder;
+typedef tesseract::PageIteratorLevel TessPageIteratorLevel;
+typedef tesseract::DictFunc TessDictFunc;
+typedef tesseract::ProbabilityInContextFunc TessProbabilityInContextFunc;
+// typedef tesseract::ParamsModelClassifyFunc TessParamsModelClassifyFunc;
+typedef tesseract::FillLatticeFunc TessFillLatticeFunc;
+typedef tesseract::Dawg TessDawg;
+typedef tesseract::TruthCallback TessTruthCallback;
+#ifndef NO_CUBE_BUILD
+typedef tesseract::CubeRecoContext TessCubeRecoContext;
+#endif  // NO_CUBE_BUILD
+typedef tesseract::Orientation TessOrientation;
+typedef tesseract::ParagraphJustification TessParagraphJustification;
+typedef tesseract::WritingDirection TessWritingDirection;
+typedef tesseract::TextlineOrder TessTextlineOrder;
+typedef PolyBlockType TessPolyBlockType;
+#else
+typedef struct TessResultRenderer TessResultRenderer;
+typedef struct TessTextRenderer TessTextRenderer;
+typedef struct TessHOcrRenderer TessHOcrRenderer;
+typedef struct TessPDFRenderer TessPDFRenderer;
+typedef struct TessUnlvRenderer TessUnlvRenderer;
+typedef struct TessBoxTextRenderer TessBoxTextRenderer;
+typedef struct TessBaseAPI TessBaseAPI;
+typedef struct TessPageIterator TessPageIterator;
+typedef struct TessResultIterator TessResultIterator;
+typedef struct TessMutableIterator TessMutableIterator;
+typedef struct TessChoiceIterator TessChoiceIterator;
+typedef enum TessOcrEngineMode     { OEM_TESSERACT_ONLY, OEM_CUBE_ONLY, OEM_TESSERACT_CUBE_COMBINED, OEM_DEFAULT } TessOcrEngineMode;
+typedef enum TessPageSegMode       { PSM_OSD_ONLY, PSM_AUTO_OSD, PSM_AUTO_ONLY, PSM_AUTO, PSM_SINGLE_COLUMN, PSM_SINGLE_BLOCK_VERT_TEXT,
+                                     PSM_SINGLE_BLOCK, PSM_SINGLE_LINE, PSM_SINGLE_WORD, PSM_CIRCLE_WORD, PSM_SINGLE_CHAR, PSM_SPARSE_TEXT,
+                                     PSM_SPARSE_TEXT_OSD, PSM_COUNT } TessPageSegMode;
+typedef enum TessPageIteratorLevel { RIL_BLOCK, RIL_PARA, RIL_TEXTLINE, RIL_WORD, RIL_SYMBOL} TessPageIteratorLevel;
+typedef enum TessPolyBlockType     { PT_UNKNOWN, PT_FLOWING_TEXT, PT_HEADING_TEXT, PT_PULLOUT_TEXT, PT_EQUATION, PT_INLINE_EQUATION,
+                                     PT_TABLE, PT_VERTICAL_TEXT, PT_CAPTION_TEXT, PT_FLOWING_IMAGE, PT_HEADING_IMAGE,
+                                     PT_PULLOUT_IMAGE, PT_HORZ_LINE, PT_VERT_LINE, PT_NOISE, PT_COUNT } TessPolyBlockType;
+typedef enum TessOrientation       { ORIENTATION_PAGE_UP, ORIENTATION_PAGE_RIGHT, ORIENTATION_PAGE_DOWN, ORIENTATION_PAGE_LEFT } TessOrientation;
+typedef enum TessParagraphJustification { JUSTIFICATION_UNKNOWN, JUSTIFICATION_LEFT, JUSTIFICATION_CENTER, JUSTIFICATION_RIGHT } TessParagraphJustification;
+typedef enum TessWritingDirection  { WRITING_DIRECTION_LEFT_TO_RIGHT, WRITING_DIRECTION_RIGHT_TO_LEFT, WRITING_DIRECTION_TOP_TO_BOTTOM } TessWritingDirection;
+typedef enum TessTextlineOrder     { TEXTLINE_ORDER_LEFT_TO_RIGHT, TEXTLINE_ORDER_RIGHT_TO_LEFT, TEXTLINE_ORDER_TOP_TO_BOTTOM } TessTextlineOrder;
+typedef struct ETEXT_DESC ETEXT_DESC;
+#endif
+
+struct Pix;
+struct Boxa;
+struct Pixa;
+
+/* General free functions */
+
+TESS_API const char*
+               TESS_CALL TessVersion();
+TESS_API void  TESS_CALL TessDeleteText(char* text);
+TESS_API void  TESS_CALL TessDeleteTextArray(char** arr);
+TESS_API void  TESS_CALL TessDeleteIntArray(int* arr);
+#ifdef TESS_CAPI_INCLUDE_BASEAPI
+TESS_API void  TESS_CALL TessDeleteBlockList(BLOCK_LIST* block_list);
+#endif
+
+/* Renderer API */
+TESS_API TessResultRenderer* TESS_CALL TessTextRendererCreate(const char* outputbase);
+TESS_API TessResultRenderer* TESS_CALL TessHOcrRendererCreate(const char* outputbase);
+TESS_API TessResultRenderer* TESS_CALL TessHOcrRendererCreate2(const char* outputbase, BOOL font_info);
+TESS_API TessResultRenderer* TESS_CALL TessPDFRendererCreate(const char* outputbase, const char* datadir);
+TESS_API TessResultRenderer* TESS_CALL TessPDFRendererCreateTextonly(const char* outputbase, const char* datadir,
+                                                             BOOL textonly);
+TESS_API TessResultRenderer* TESS_CALL TessUnlvRendererCreate(const char* outputbase);
+TESS_API TessResultRenderer* TESS_CALL TessBoxTextRendererCreate(const char* outputbase);
+
+TESS_API void TESS_CALL TessDeleteResultRenderer(TessResultRenderer* renderer);
+TESS_API void TESS_CALL TessResultRendererInsert(TessResultRenderer* renderer, TessResultRenderer* next);
+TESS_API TessResultRenderer*
+              TESS_CALL TessResultRendererNext(TessResultRenderer* renderer);
+TESS_API BOOL TESS_CALL TessResultRendererBeginDocument(TessResultRenderer* renderer, const char* title);
+TESS_API BOOL TESS_CALL TessResultRendererAddImage(TessResultRenderer* renderer, TessBaseAPI* api);
+TESS_API BOOL TESS_CALL TessResultRendererEndDocument(TessResultRenderer* renderer);
+
+TESS_API const char* TESS_CALL TessResultRendererExtention(TessResultRenderer* renderer);
+TESS_API const char* TESS_CALL TessResultRendererTitle(TessResultRenderer* renderer);
+TESS_API int TESS_CALL TessResultRendererImageNum(TessResultRenderer* renderer);
+
+/* Base API */
+
+TESS_API TessBaseAPI*
+               TESS_CALL TessBaseAPICreate();
+TESS_API void  TESS_CALL TessBaseAPIDelete(TessBaseAPI* handle);
+
+TESS_API size_t TESS_CALL TessBaseAPIGetOpenCLDevice(TessBaseAPI* handle, void **device);
+
+TESS_API void  TESS_CALL TessBaseAPISetInputName( TessBaseAPI* handle, const char* name);
+TESS_API const char* TESS_CALL TessBaseAPIGetInputName(TessBaseAPI* handle);
+
+TESS_API void  TESS_CALL TessBaseAPISetInputImage(TessBaseAPI* handle, struct Pix* pix);
+TESS_API struct Pix*  TESS_CALL TessBaseAPIGetInputImage(TessBaseAPI* handle);
+
+TESS_API int   TESS_CALL TessBaseAPIGetSourceYResolution(TessBaseAPI* handle);
+TESS_API const char* TESS_CALL TessBaseAPIGetDatapath(TessBaseAPI* handle);
+
+TESS_API void  TESS_CALL TessBaseAPISetOutputName(TessBaseAPI* handle, const char* name);
+
+TESS_API BOOL  TESS_CALL TessBaseAPISetVariable(TessBaseAPI* handle, const char* name, const char* value);
+TESS_API BOOL  TESS_CALL TessBaseAPISetDebugVariable(TessBaseAPI* handle, const char* name, const char* value);
+
+TESS_API BOOL  TESS_CALL TessBaseAPIGetIntVariable(   const TessBaseAPI* handle, const char* name, int* value);
+TESS_API BOOL  TESS_CALL TessBaseAPIGetBoolVariable(  const TessBaseAPI* handle, const char* name, BOOL* value);
+TESS_API BOOL  TESS_CALL TessBaseAPIGetDoubleVariable(const TessBaseAPI* handle, const char* name, double* value);
+TESS_API const char*
+               TESS_CALL TessBaseAPIGetStringVariable(const TessBaseAPI* handle, const char* name);
+
+TESS_API void  TESS_CALL TessBaseAPIPrintVariables(      const TessBaseAPI* handle, FILE* fp);
+TESS_API BOOL  TESS_CALL TessBaseAPIPrintVariablesToFile(const TessBaseAPI* handle, const char* filename);
+#ifdef TESS_CAPI_INCLUDE_BASEAPI
+TESS_API BOOL  TESS_CALL TessBaseAPIGetVariableAsString(TessBaseAPI* handle, const char* name, STRING* val);
+#endif
+
+#ifdef TESS_CAPI_INCLUDE_BASEAPI
+TESS_API int   TESS_CALL TessBaseAPIInit(TessBaseAPI* handle, const char* datapath, const char* language,
+                                         TessOcrEngineMode mode, char** configs, int configs_size,
+                                         const STRING* vars_vec, size_t vars_vec_size,
+                                         const STRING* vars_values, size_t vars_values_size, BOOL set_only_init_params);
+#endif
+TESS_API int   TESS_CALL TessBaseAPIInit1(TessBaseAPI* handle, const char* datapath, const char* language, TessOcrEngineMode oem,
+                                          char** configs, int configs_size);
+TESS_API int   TESS_CALL TessBaseAPIInit2(TessBaseAPI* handle, const char* datapath, const char* language, TessOcrEngineMode oem);
+TESS_API int   TESS_CALL TessBaseAPIInit3(TessBaseAPI* handle, const char* datapath, const char* language);
+
+TESS_API int TESS_CALL TessBaseAPIInit4(TessBaseAPI* handle, const char* datapath, const char* language, TessOcrEngineMode mode,
+    char** configs, int configs_size,
+    char** vars_vec, char** vars_values, size_t vars_vec_size,
+    BOOL set_only_non_debug_params);
+
+TESS_API const char*
+               TESS_CALL TessBaseAPIGetInitLanguagesAsString(const TessBaseAPI* handle);
+TESS_API char**
+               TESS_CALL TessBaseAPIGetLoadedLanguagesAsVector(const TessBaseAPI* handle);
+TESS_API char**
+               TESS_CALL TessBaseAPIGetAvailableLanguagesAsVector(const TessBaseAPI* handle);
+
+TESS_API int   TESS_CALL TessBaseAPIInitLangMod(TessBaseAPI* handle, const char* datapath, const char* language);
+TESS_API void  TESS_CALL TessBaseAPIInitForAnalysePage(TessBaseAPI* handle);
+
+TESS_API void  TESS_CALL TessBaseAPIReadConfigFile(TessBaseAPI* handle, const char* filename);
+TESS_API void  TESS_CALL TessBaseAPIReadDebugConfigFile(TessBaseAPI* handle, const char* filename);
+
+TESS_API void  TESS_CALL TessBaseAPISetPageSegMode(TessBaseAPI* handle, TessPageSegMode mode);
+TESS_API TessPageSegMode
+               TESS_CALL TessBaseAPIGetPageSegMode(const TessBaseAPI* handle);
+
+TESS_API char* TESS_CALL TessBaseAPIRect(TessBaseAPI* handle, const unsigned char* imagedata,
+                                         int bytes_per_pixel, int bytes_per_line,
+                                         int left, int top, int width, int height);
+
+TESS_API void  TESS_CALL TessBaseAPIClearAdaptiveClassifier(TessBaseAPI* handle);
+
+TESS_API void  TESS_CALL TessBaseAPISetImage(TessBaseAPI* handle, const unsigned char* imagedata, int width, int height,
+                                             int bytes_per_pixel, int bytes_per_line);
+TESS_API void  TESS_CALL TessBaseAPISetImage2(TessBaseAPI* handle, struct Pix* pix);
+
+TESS_API void TESS_CALL TessBaseAPISetSourceResolution(TessBaseAPI* handle, int ppi);
+
+TESS_API void  TESS_CALL TessBaseAPISetRectangle(TessBaseAPI* handle, int left, int top, int width, int height);
+
+#ifdef TESS_CAPI_INCLUDE_BASEAPI
+TESS_API void  TESS_CALL TessBaseAPISetThresholder(TessBaseAPI* handle, TessImageThresholder* thresholder);
+#endif
+
+TESS_API struct Pix*
+               TESS_CALL TessBaseAPIGetThresholdedImage(   TessBaseAPI* handle);
+TESS_API struct Boxa*
+               TESS_CALL TessBaseAPIGetRegions(            TessBaseAPI* handle, struct Pixa** pixa);
+TESS_API struct Boxa*
+               TESS_CALL TessBaseAPIGetTextlines(          TessBaseAPI* handle, struct Pixa** pixa, int** blockids);
+TESS_API struct Boxa*
+               TESS_CALL TessBaseAPIGetTextlines1(         TessBaseAPI* handle, const BOOL raw_image, const int raw_padding,
+                                                                                struct Pixa** pixa, int** blockids, int** paraids);
+TESS_API struct Boxa*
+               TESS_CALL TessBaseAPIGetStrips(             TessBaseAPI* handle, struct Pixa** pixa, int** blockids);
+TESS_API struct Boxa*
+               TESS_CALL TessBaseAPIGetWords(              TessBaseAPI* handle, struct Pixa** pixa);
+TESS_API struct Boxa*
+               TESS_CALL TessBaseAPIGetConnectedComponents(TessBaseAPI* handle, struct Pixa** cc);
+TESS_API struct Boxa*
+               TESS_CALL TessBaseAPIGetComponentImages(    TessBaseAPI* handle, const TessPageIteratorLevel level, const BOOL text_only,
+                                                           struct Pixa** pixa, int** blockids);
+TESS_API struct Boxa*
+               TESS_CALL TessBaseAPIGetComponentImages1(   TessBaseAPI* handle, const TessPageIteratorLevel level, const BOOL text_only,
+                                                           const BOOL raw_image, const int raw_padding,
+                                                           struct Pixa** pixa, int** blockids, int** paraids);
+
+TESS_API int   TESS_CALL TessBaseAPIGetThresholdedImageScaleFactor(const TessBaseAPI* handle);
+
+TESS_API void  TESS_CALL TessBaseAPIDumpPGM(TessBaseAPI* handle, const char* filename);
+
+TESS_API TessPageIterator*
+               TESS_CALL TessBaseAPIAnalyseLayout(TessBaseAPI* handle);
+
+TESS_API int   TESS_CALL TessBaseAPIRecognize(TessBaseAPI* handle, ETEXT_DESC* monitor);
+TESS_API int   TESS_CALL TessBaseAPIRecognizeForChopTest(TessBaseAPI* handle, ETEXT_DESC* monitor);
+TESS_API BOOL  TESS_CALL TessBaseAPIProcessPages(TessBaseAPI* handle,  const char* filename, const char* retry_config,
+                                                 int timeout_millisec, TessResultRenderer* renderer);
+TESS_API BOOL  TESS_CALL TessBaseAPIProcessPage(TessBaseAPI* handle, struct Pix* pix, int page_index, const char* filename,
+                                               const char* retry_config, int timeout_millisec, TessResultRenderer* renderer);
+
+TESS_API TessResultIterator*
+               TESS_CALL TessBaseAPIGetIterator(TessBaseAPI* handle);
+TESS_API TessMutableIterator*
+               TESS_CALL TessBaseAPIGetMutableIterator(TessBaseAPI* handle);
+
+TESS_API char* TESS_CALL TessBaseAPIGetUTF8Text(TessBaseAPI* handle);
+TESS_API char* TESS_CALL TessBaseAPIGetHOCRText(TessBaseAPI* handle, int page_number);
+TESS_API char* TESS_CALL TessBaseAPIGetBoxText(TessBaseAPI* handle, int page_number);
+TESS_API char* TESS_CALL TessBaseAPIGetUNLVText(TessBaseAPI* handle);
+TESS_API int   TESS_CALL TessBaseAPIMeanTextConf(TessBaseAPI* handle);
+TESS_API int*  TESS_CALL TessBaseAPIAllWordConfidences(TessBaseAPI* handle);
+TESS_API BOOL  TESS_CALL TessBaseAPIAdaptToWordStr(TessBaseAPI* handle, TessPageSegMode mode, const char* wordstr);
+
+TESS_API void  TESS_CALL TessBaseAPIClear(TessBaseAPI* handle);
+TESS_API void  TESS_CALL TessBaseAPIEnd(TessBaseAPI* handle);
+
+TESS_API int   TESS_CALL TessBaseAPIIsValidWord(TessBaseAPI* handle, const char* word);
+TESS_API BOOL  TESS_CALL TessBaseAPIGetTextDirection(TessBaseAPI* handle, int* out_offset, float* out_slope);
+
+#ifdef TESS_CAPI_INCLUDE_BASEAPI
+TESS_API void  TESS_CALL TessBaseAPISetDictFunc(TessBaseAPI* handle, TessDictFunc f);
+TESS_API void  TESS_CALL TessBaseAPIClearPersistentCache(TessBaseAPI* handle);
+TESS_API void  TESS_CALL TessBaseAPISetProbabilityInContextFunc(TessBaseAPI* handle, TessProbabilityInContextFunc f);
+
+TESS_API void  TESS_CALL TessBaseAPISetFillLatticeFunc(TessBaseAPI* handle, TessFillLatticeFunc f);
+
+// Deprecated, no longer working
+TESS_API BOOL  TESS_CALL TessBaseAPIDetectOS(TessBaseAPI* handle, OSResults* results);
+
+// Call TessDeleteText(*best_script_name) to free memory allocated by this function
+TESS_API BOOL  TESS_CALL TessBaseAPIDetectOrientationScript(TessBaseAPI* handle,
+                                                            int* orient_deg, float* orient_conf, const char **script_name, float* script_conf);
+
+TESS_API void  TESS_CALL TessBaseAPIGetFeaturesForBlob(TessBaseAPI* handle, TBLOB* blob, INT_FEATURE_STRUCT* int_features,
+                                                       int* num_features, int* FeatureOutlineIndex);
+
+TESS_API ROW*  TESS_CALL TessFindRowForBox(BLOCK_LIST* blocks, int left, int top, int right, int bottom);
+TESS_API void  TESS_CALL TessBaseAPIRunAdaptiveClassifier(TessBaseAPI* handle, TBLOB* blob, int num_max_matches,
+                                                          int* unichar_ids, float* ratings, int* num_matches_returned);
+#endif
+
+TESS_API const char*
+               TESS_CALL TessBaseAPIGetUnichar(TessBaseAPI* handle, int unichar_id);
+
+#ifdef TESS_CAPI_INCLUDE_BASEAPI
+TESS_API const TessDawg*
+               TESS_CALL TessBaseAPIGetDawg(const TessBaseAPI* handle, int i);
+TESS_API int   TESS_CALL TessBaseAPINumDawgs(const TessBaseAPI* handle);
+#endif
+
+#ifdef TESS_CAPI_INCLUDE_BASEAPI
+TESS_API ROW*  TESS_CALL TessMakeTessOCRRow(float baseline, float xheight, float descender, float ascender);
+TESS_API TBLOB*
+               TESS_CALL TessMakeTBLOB(Pix* pix);
+TESS_API void  TESS_CALL TessNormalizeTBLOB(TBLOB* tblob, ROW* row, BOOL numeric_mode);
+
+TESS_API TessOcrEngineMode
+               TESS_CALL TessBaseAPIOem(const TessBaseAPI* handle);
+TESS_API void  TESS_CALL TessBaseAPIInitTruthCallback(TessBaseAPI* handle, TessTruthCallback* cb);
+
+#ifndef NO_CUBE_BUILD
+TESS_API TessCubeRecoContext*
+               TESS_CALL TessBaseAPIGetCubeRecoContext(const TessBaseAPI* handle);
+#endif  // NO_CUBE_BUILD
+#endif
+
+TESS_API void  TESS_CALL TessBaseAPISetMinOrientationMargin(TessBaseAPI* handle, double margin);
+#ifdef TESS_CAPI_INCLUDE_BASEAPI
+TESS_API void  TESS_CALL TessBaseGetBlockTextOrientations(TessBaseAPI* handle, int** block_orientation, BOOL** vertical_writing);
+
+TESS_API BLOCK_LIST*
+               TESS_CALL TessBaseAPIFindLinesCreateBlockList(TessBaseAPI* handle);
+#endif
+
+/* Page iterator */
+
+TESS_API void  TESS_CALL TessPageIteratorDelete(TessPageIterator* handle);
+TESS_API TessPageIterator*
+               TESS_CALL TessPageIteratorCopy(const TessPageIterator* handle);
+
+TESS_API void  TESS_CALL TessPageIteratorBegin(TessPageIterator* handle);
+TESS_API BOOL  TESS_CALL TessPageIteratorNext(TessPageIterator* handle, TessPageIteratorLevel level);
+TESS_API BOOL  TESS_CALL TessPageIteratorIsAtBeginningOf(const TessPageIterator* handle, TessPageIteratorLevel level);
+TESS_API BOOL  TESS_CALL TessPageIteratorIsAtFinalElement(const TessPageIterator* handle, TessPageIteratorLevel level,
+                                                          TessPageIteratorLevel element);
+
+TESS_API BOOL  TESS_CALL TessPageIteratorBoundingBox(const TessPageIterator* handle, TessPageIteratorLevel level,
+                                                     int* left, int* top, int* right, int* bottom);
+TESS_API TessPolyBlockType
+               TESS_CALL TessPageIteratorBlockType(const TessPageIterator* handle);
+
+TESS_API struct Pix*
+               TESS_CALL TessPageIteratorGetBinaryImage(const TessPageIterator* handle, TessPageIteratorLevel level);
+TESS_API struct Pix*
+               TESS_CALL TessPageIteratorGetImage(const TessPageIterator* handle, TessPageIteratorLevel level, int padding,
+                                                  struct Pix* original_image, int* left, int* top);
+
+TESS_API BOOL  TESS_CALL TessPageIteratorBaseline(const TessPageIterator* handle, TessPageIteratorLevel level,
+                                                  int* x1, int* y1, int* x2, int* y2);
+
+TESS_API void  TESS_CALL TessPageIteratorOrientation(TessPageIterator* handle, TessOrientation* orientation,
+                                                     TessWritingDirection* writing_direction, TessTextlineOrder* textline_order,
+                                                     float* deskew_angle);
+
+TESS_API void  TESS_CALL TessPageIteratorParagraphInfo(TessPageIterator* handle, TessParagraphJustification* justification,
+                                                       BOOL *is_list_item, BOOL *is_crown, int *first_line_indent);
+
+/* Result iterator */
+
+TESS_API void  TESS_CALL TessResultIteratorDelete(TessResultIterator* handle);
+TESS_API TessResultIterator*
+               TESS_CALL TessResultIteratorCopy(const TessResultIterator* handle);
+TESS_API TessPageIterator*
+               TESS_CALL TessResultIteratorGetPageIterator(TessResultIterator* handle);
+TESS_API const TessPageIterator*
+               TESS_CALL TessResultIteratorGetPageIteratorConst(const TessResultIterator* handle);
+TESS_API TessChoiceIterator*
+               TESS_CALL TessResultIteratorGetChoiceIterator(const TessResultIterator* handle);
+
+TESS_API BOOL  TESS_CALL TessResultIteratorNext(TessResultIterator* handle, TessPageIteratorLevel level);
+TESS_API char* TESS_CALL TessResultIteratorGetUTF8Text(const TessResultIterator* handle, TessPageIteratorLevel level);
+TESS_API float TESS_CALL TessResultIteratorConfidence(const TessResultIterator* handle, TessPageIteratorLevel level);
+TESS_API const char*
+               TESS_CALL TessResultIteratorWordRecognitionLanguage(const TessResultIterator* handle);
+TESS_API const char*
+               TESS_CALL TessResultIteratorWordFontAttributes(const TessResultIterator* handle, BOOL* is_bold, BOOL* is_italic,
+                                                              BOOL* is_underlined, BOOL* is_monospace, BOOL* is_serif,
+                                                              BOOL* is_smallcaps, int* pointsize, int* font_id);
+
+TESS_API BOOL  TESS_CALL TessResultIteratorWordIsFromDictionary(const TessResultIterator* handle);
+TESS_API BOOL  TESS_CALL TessResultIteratorWordIsNumeric(const TessResultIterator* handle);
+TESS_API BOOL  TESS_CALL TessResultIteratorSymbolIsSuperscript(const TessResultIterator* handle);
+TESS_API BOOL  TESS_CALL TessResultIteratorSymbolIsSubscript(const TessResultIterator* handle);
+TESS_API BOOL  TESS_CALL TessResultIteratorSymbolIsDropcap(const TessResultIterator* handle);
+
+TESS_API void  TESS_CALL TessChoiceIteratorDelete(TessChoiceIterator* handle);
+TESS_API BOOL  TESS_CALL TessChoiceIteratorNext(TessChoiceIterator* handle);
+TESS_API const char* TESS_CALL TessChoiceIteratorGetUTF8Text(const TessChoiceIterator* handle);
+TESS_API float TESS_CALL TessChoiceIteratorConfidence(const TessChoiceIterator* handle);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif  // API_CAPI_H_
--- a/hgdriver/3rdparty/hgOCR/include/api/config_auto.h
+++ b/hgdriver/3rdparty/hgOCR/include/api/config_auto.h
@ -0,0 +1,9 @@
+#define  HAVE_LIBJPEG     1
+#define  HAVE_LIBTIFF     1
+#define  HAVE_LIBPNG      1
+#define  HAVE_LIBZ        1
+#define  HAVE_LIBGIF      1
+#define  HAVE_LIBUNGIF    0
+#define  HAVE_LIBWEBP     1
+#define  HAVE_LIBJP2K     1
+#define  LIBJP2K_HEADER   <openjpeg.h>
--- a/hgdriver/3rdparty/hgOCR/include/api/pdfrenderer.cpp
+++ b/hgdriver/3rdparty/hgOCR/include/api/pdfrenderer.cpp
--- a/hgdriver/3rdparty/hgOCR/include/api/renderer.cpp
+++ b/hgdriver/3rdparty/hgOCR/include/api/renderer.cpp
@ -0,0 +1,283 @@
+///////////////////////////////////////////////////////////////////////
+// File:        renderer.cpp
+// Description: Rendering interface to inject into TessBaseAPI
+//
+// (C) Copyright 2011, Google Inc.
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+// http://www.apache.org/licenses/LICENSE-2.0
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+///////////////////////////////////////////////////////////////////////
+
+#ifdef HAVE_CONFIG_H
+#include "config_auto.h"
+#endif
+
+#include <string.h>
+#include "baseapi.h"
+#include "genericvector.h"
+#include "renderer.h"
+
+namespace tesseract {
+
+	/**********************************************************************
+	 * Base Renderer interface implementation
+	 **********************************************************************/
+	TessResultRenderer::TessResultRenderer(const char *outputbase,
+		const char* extension)
+		: file_extension_(extension),
+		title_(""), imagenum_(-1),
+		fout_(stdout),
+		next_(NULL),
+		happy_(true) {
+		if (strcmp(outputbase, "-") && strcmp(outputbase, "stdout")) {
+			STRING outfile = STRING(outputbase) + STRING(".") + STRING(file_extension_);
+			fout_ = fopen(outfile.string(), "wb");
+			if (fout_ == NULL) {
+				happy_ = false;
+			}
+		}
+	}
+
+	TessResultRenderer::~TessResultRenderer() {
+		if (fout_ != NULL) {
+			if (fout_ != stdout)
+				fclose(fout_);
+			else
+				clearerr(fout_);
+		}
+		delete next_;
+	}
+
+	void TessResultRenderer::insert(TessResultRenderer* next) {
+		if (next == NULL) return;
+
+		TessResultRenderer* remainder = next_;
+		next_ = next;
+		if (remainder) {
+			while (next->next_ != NULL) {
+				next = next->next_;
+			}
+			next->next_ = remainder;
+		}
+	}
+
+	bool TessResultRenderer::BeginDocument(const char* title) {
+		if (!happy_) return false;
+		title_ = title;
+		imagenum_ = -1;
+		bool ok = BeginDocumentHandler();
+		if (next_) {
+			ok = next_->BeginDocument(title) && ok;
+		}
+		return ok;
+	}
+
+	bool TessResultRenderer::AddImage(TessBaseAPI* api, const char* jpgdata, int len) {
+		if (!happy_) return false;
+		++imagenum_;
+		bool ok = AddImageHandler(api, jpgdata, len);
+		if (next_) {
+			ok = next_->AddImage(api, jpgdata, len) && ok;
+		}
+		return ok;
+	}
+
+	bool TessResultRenderer::EndDocument() {
+		if (!happy_) return false;
+		bool ok = EndDocumentHandler();
+		if (next_) {
+			ok = next_->EndDocument() && ok;
+		}
+		return ok;
+	}
+
+	void TessResultRenderer::AppendString(const char* s) {
+		AppendData(s, strlen(s));
+	}
+
+	void TessResultRenderer::AppendData(const char* s, int len) {
+		int n = fwrite(s, 1, len, fout_);
+		if (n != len) happy_ = false;
+	}
+
+	bool TessResultRenderer::BeginDocumentHandler() {
+		return happy_;
+	}
+
+	bool TessResultRenderer::EndDocumentHandler() {
+		return happy_;
+	}
+
+
+	/**********************************************************************
+	 * UTF8 Text Renderer interface implementation
+	 **********************************************************************/
+	TessTextRenderer::TessTextRenderer(const char *outputbase)
+		: TessResultRenderer(outputbase, "txt") {
+	}
+
+	bool TessTextRenderer::AddImageHandler(TessBaseAPI* api) {
+		char* utf8 = api->GetUTF8Text();
+		if (utf8 == NULL) {
+			return false;
+		}
+
+		AppendString(utf8);
+		delete[] utf8;
+
+		bool pageBreak = false;
+		api->GetBoolVariable("include_page_breaks", &pageBreak);
+		const char* pageSeparator = api->GetStringVariable("page_separator");
+		if (pageBreak) {
+			AppendString(pageSeparator);
+		}
+
+		return true;
+	}
+
+	/**********************************************************************
+	 * HOcr Text Renderer interface implementation
+	 **********************************************************************/
+	TessHOcrRenderer::TessHOcrRenderer(const char *outputbase)
+		: TessResultRenderer(outputbase, "hocr") {
+		font_info_ = false;
+	}
+
+	TessHOcrRenderer::TessHOcrRenderer(const char *outputbase, bool font_info)
+		: TessResultRenderer(outputbase, "hocr") {
+		font_info_ = font_info;
+	}
+
+	bool TessHOcrRenderer::BeginDocumentHandler() {
+		AppendString(
+			"<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n"
+			"<!DOCTYPE html PUBLIC \"-//W3C//DTD XHTML 1.0 Transitional//EN\"\n"
+			"    \"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd\">\n"
+			"<html xmlns=\"http://www.w3.org/1999/xhtml\" xml:lang=\"en\" "
+			"lang=\"en\">\n <head>\n  <title>");
+		AppendString(title());
+		AppendString(
+			"</title>\n"
+			"<meta http-equiv=\"Content-Type\" content=\"text/html;"
+			"charset=utf-8\" />\n"
+			"  <meta name='ocr-system' content='tesseract " TESSERACT_VERSION_STR
+			"' />\n"
+			"  <meta name='ocr-capabilities' content='ocr_page ocr_carea ocr_par"
+			" ocr_line ocrx_word");
+		if (font_info_)
+			AppendString(
+				" ocrp_lang ocrp_dir ocrp_font ocrp_fsize ocrp_wconf");
+		AppendString(
+			"'/>\n"
+			"</head>\n<body>\n");
+
+		return true;
+	}
+
+	bool TessHOcrRenderer::EndDocumentHandler() {
+		AppendString(" </body>\n</html>\n");
+
+		return true;
+	}
+
+	bool TessHOcrRenderer::AddImageHandler(TessBaseAPI* api) {
+		char* hocr = api->GetHOCRText(imagenum());
+		if (hocr == NULL) return false;
+
+		AppendString(hocr);
+		delete[] hocr;
+
+		return true;
+	}
+
+	/**********************************************************************
+	 * TSV Text Renderer interface implementation
+	 **********************************************************************/
+	TessTsvRenderer::TessTsvRenderer(const char* outputbase)
+		: TessResultRenderer(outputbase, "tsv") {
+		font_info_ = false;
+	}
+
+	TessTsvRenderer::TessTsvRenderer(const char* outputbase, bool font_info)
+		: TessResultRenderer(outputbase, "tsv") {
+		font_info_ = font_info;
+	}
+
+	bool TessTsvRenderer::BeginDocumentHandler() {
+		// Output TSV column headings
+		AppendString(
+			"level\tpage_num\tblock_num\tpar_num\tline_num\tword_"
+			"num\tleft\ttop\twidth\theight\tconf\ttext\n");
+		return true;
+	}
+
+	bool TessTsvRenderer::EndDocumentHandler() { return true; }
+
+	bool TessTsvRenderer::AddImageHandler(TessBaseAPI* api) {
+		char* tsv = api->GetTSVText(imagenum());
+		if (tsv == NULL) return false;
+
+		AppendString(tsv);
+		delete[] tsv;
+
+		return true;
+	}
+
+	/**********************************************************************
+	 * UNLV Text Renderer interface implementation
+	 **********************************************************************/
+	TessUnlvRenderer::TessUnlvRenderer(const char *outputbase)
+		: TessResultRenderer(outputbase, "unlv") {
+	}
+
+	bool TessUnlvRenderer::AddImageHandler(TessBaseAPI* api) {
+		char* unlv = api->GetUNLVText();
+		if (unlv == NULL) return false;
+
+		AppendString(unlv);
+		delete[] unlv;
+
+		return true;
+	}
+
+	/**********************************************************************
+	 * BoxText Renderer interface implementation
+	 **********************************************************************/
+	TessBoxTextRenderer::TessBoxTextRenderer(const char *outputbase)
+		: TessResultRenderer(outputbase, "box") {
+	}
+
+	bool TessBoxTextRenderer::AddImageHandler(TessBaseAPI* api) {
+		char* text = api->GetBoxText(imagenum());
+		if (text == NULL) return false;
+
+		AppendString(text);
+		delete[] text;
+
+		return true;
+	}
+
+	/**********************************************************************
+	 * Osd Text Renderer interface implementation
+	 **********************************************************************/
+	TessOsdRenderer::TessOsdRenderer(const char* outputbase)
+		: TessResultRenderer(outputbase, "osd") {}
+
+	bool TessOsdRenderer::AddImageHandler(TessBaseAPI* api) {
+		char* osd = api->GetOsdText(imagenum());
+		if (osd == NULL) return false;
+
+		AppendString(osd);
+		delete[] osd;
+
+		return true;
+	}
+
+}  // namespace tesseract
--- a/hgdriver/3rdparty/hgOCR/include/api/renderer.h
+++ b/hgdriver/3rdparty/hgOCR/include/api/renderer.h
@ -0,0 +1,271 @@
+///////////////////////////////////////////////////////////////////////
+// File:        renderer.h
+// Description: Rendering interface to inject into TessBaseAPI
+//
+// (C) Copyright 2011, Google Inc.
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+// http://www.apache.org/licenses/LICENSE-2.0
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+///////////////////////////////////////////////////////////////////////
+
+#ifndef TESSERACT_API_RENDERER_H_
+#define TESSERACT_API_RENDERER_H_
+
+// To avoid collision with other typenames include the ABSOLUTE MINIMUM
+// complexity of includes here. Use forward declarations wherever possible
+// and hide includes of complex types in baseapi.cpp.
+#include "genericvector.h"
+#include "platform.h"
+#include "publictypes.h"
+
+namespace tesseract {
+
+	class TessBaseAPI;
+
+	/**
+	 * Interface for rendering tesseract results into a document, such as text,
+	 * HOCR or pdf. This class is abstract. Specific classes handle individual
+	 * formats. This interface is then used to inject the renderer class into
+	 * tesseract when processing images.
+	 *
+	 * For simplicity implementing this with tesesract version 3.01,
+	 * the renderer contains document state that is cleared from document
+	 * to document just as the TessBaseAPI is. This way the base API can just
+	 * delegate its rendering functionality to injected renderers, and the
+	 * renderers can manage the associated state needed for the specific formats
+	 * in addition to the heuristics for producing it.
+	 */
+	class TESS_API TessResultRenderer {
+	public:
+		virtual ~TessResultRenderer();
+
+		// Takes ownership of pointer so must be new'd instance.
+		// Renderers aren't ordered, but appends the sequences of next parameter
+		// and existing next(). The renderers should be unique across both lists.
+		void insert(TessResultRenderer* next);
+
+		// Returns the next renderer or NULL.
+		TessResultRenderer* next() { return next_; }
+
+		/**
+		 * Starts a new document with the given title.
+		 * This clears the contents of the output data.
+		 * Title should use UTF-8 encoding.
+		 */
+		bool BeginDocument(const char* title);
+
+		/**
+		 * Adds the recognized text from the source image to the current document.
+		 * Invalid if BeginDocument not yet called.
+		 *
+		 * Note that this API is a bit weird but is designed to fit into the
+		 * current TessBaseAPI implementation where the api has lots of state
+		 * information that we might want to add in.
+		 */
+		bool AddImage(TessBaseAPI * api, const char * jpgdata, int len);
+
+		/**
+		 * Finishes the document and finalizes the output data
+		 * Invalid if BeginDocument not yet called.
+		 */
+		bool EndDocument();
+
+		const char* file_extension() const { return file_extension_; }
+		const char* title() const { return title_.c_str(); }
+
+		/**
+		 * Returns the index of the last image given to AddImage
+		 * (i.e. images are incremented whether the image succeeded or not)
+		 *
+		 * This is always defined. It means either the number of the
+		 * current image, the last image ended, or in the completed document
+		 * depending on when in the document lifecycle you are looking at it.
+		 * Will return -1 if a document was never started.
+		 */
+		int imagenum() const { return imagenum_; }
+
+	protected:
+		/**
+		 * Called by concrete classes.
+		 *
+		 * outputbase is the name of the output file excluding
+		 * extension. For example, "/path/to/chocolate-chip-cookie-recipe"
+		 *
+		 * extension indicates the file extension to be used for output
+		 * files. For example "pdf" will produce a .pdf file, and "hocr"
+		 * will produce .hocr files.
+		 */
+		TessResultRenderer(const char *outputbase,
+			const char* extension);
+
+		// Hook for specialized handling in BeginDocument()
+		virtual bool BeginDocumentHandler();
+
+		// This must be overriden to render the OCR'd results
+		virtual bool AddImageHandler(TessBaseAPI* api) = 0;
+
+		virtual bool AddImageHandler(TessBaseAPI* api, const char* jpgdata, int len) = 0;
+
+		// Hook for specialized handling in EndDocument()
+		virtual bool EndDocumentHandler();
+
+		// Renderers can call this to append '\0' terminated strings into
+		// the output string returned by GetOutput.
+		// This method will grow the output buffer if needed.
+		void AppendString(const char* s);
+
+		// Renderers can call this to append binary byte sequences into
+		// the output string returned by GetOutput. Note that s is not necessarily
+		// '\0' terminated (and can contain '\0' within it).
+		// This method will grow the output buffer if needed.
+		void AppendData(const char* s, int len);
+
+	private:
+		const char* file_extension_;  // standard extension for generated output
+		STRING title_;                // title of document being renderered
+		int imagenum_;                // index of last image added
+
+		FILE* fout_;                  // output file pointer
+		TessResultRenderer* next_;    // Can link multiple renderers together
+		bool happy_;                  // I get grumpy when the disk fills up, etc.
+	};
+
+	/**
+	 * Renders tesseract output into a plain UTF-8 text string
+	 */
+	class TESS_API TessTextRenderer : public TessResultRenderer {
+	public:
+		explicit TessTextRenderer(const char *outputbase);
+
+	protected:
+		virtual bool AddImageHandler(TessBaseAPI* api);
+		virtual bool AddImageHandler(TessBaseAPI* api, const char* jpgdata, int len) { return false; }
+	};
+
+	/**
+	 * Renders tesseract output into an hocr text string
+	 */
+	class TESS_API TessHOcrRenderer : public TessResultRenderer {
+	public:
+		explicit TessHOcrRenderer(const char *outputbase, bool font_info);
+		explicit TessHOcrRenderer(const char *outputbase);
+
+	protected:
+		virtual bool BeginDocumentHandler();
+		virtual bool AddImageHandler(TessBaseAPI* api);
+		virtual bool AddImageHandler(TessBaseAPI* api, const char* jpgdata, int len) { return false; }
+		virtual bool EndDocumentHandler();
+
+	private:
+		bool font_info_;  // whether to print font information
+	};
+
+	/**
+	 * Renders Tesseract output into a TSV string
+	 */
+	class TESS_API TessTsvRenderer : public TessResultRenderer {
+	public:
+		explicit TessTsvRenderer(const char* outputbase, bool font_info);
+		explicit TessTsvRenderer(const char* outputbase);
+
+	protected:
+		virtual bool BeginDocumentHandler();
+		virtual bool AddImageHandler(TessBaseAPI* api);
+		virtual bool AddImageHandler(TessBaseAPI* api, const char* jpgdata, int len) { return false; }
+		virtual bool EndDocumentHandler();
+
+	private:
+		bool font_info_;              // whether to print font information
+	};
+
+	/**
+	 * Renders tesseract output into searchable PDF
+	 */
+	class TESS_API TessPDFRenderer : public TessResultRenderer {
+	public:
+		// datadir is the location of the TESSDATA. We need it because
+		// we load a custom PDF font from this location.
+		TessPDFRenderer(const char* outputbase, const char* datadir);
+		TessPDFRenderer(const char* outputbase, const char* datadir, bool textonly);
+
+	protected:
+
+		virtual bool AddImageHandler(TessBaseAPI* api, const char* jpgdata, int len);
+
+		virtual bool BeginDocumentHandler();
+
+		virtual bool AddImageHandler(TessBaseAPI* api);
+
+		virtual bool EndDocumentHandler();
+
+	private:
+		// We don't want to have every image in memory at once,
+		// so we store some metadata as we go along producing
+		// PDFs one page at a time. At the end, that metadata is
+		// used to make everything that isn't easily handled in a
+		// streaming fashion.
+		long int obj_;                     // counter for PDF objects
+		GenericVector<long int> offsets_;  // offset of every PDF object in bytes
+		GenericVector<long int> pages_;    // object number for every /Page object
+		const char *datadir_;              // where to find the custom font
+		bool textonly_;                    // skip images if set
+		// Bookkeeping only. DIY = Do It Yourself.
+		void AppendPDFObjectDIY(size_t objectsize);
+		// Bookkeeping + emit data.
+		void AppendPDFObject(const char *data);
+		// Create the /Contents object for an entire page.
+		char* GetPDFTextObjects(TessBaseAPI* api, double width, double height);
+		// Turn an image into a PDF object. Only transcode if we have to.
+		static bool imageToPDFObj(Pix *pix, char *filename, long int objnum,
+			char **pdf_object, long int *pdf_object_size);
+		static bool imageToPDFObj(const char* jpgdata, int len, long int objnum,
+			char **pdf_object, long int *pdf_object_size);
+	};
+
+
+	/**
+	 * Renders tesseract output into a plain UTF-8 text string
+	 */
+	class TESS_API TessUnlvRenderer : public TessResultRenderer {
+	public:
+		explicit TessUnlvRenderer(const char *outputbase);
+
+	protected:
+		virtual bool AddImageHandler(TessBaseAPI* api);
+		virtual bool AddImageHandler(TessBaseAPI* api, const char* jpgdata, int len) { return false; }
+	};
+
+	/**
+	 * Renders tesseract output into a plain UTF-8 text string
+	 */
+	class TESS_API TessBoxTextRenderer : public TessResultRenderer {
+	public:
+		explicit TessBoxTextRenderer(const char *outputbase);
+
+	protected:
+		virtual bool AddImageHandler(TessBaseAPI* api);
+		virtual bool AddImageHandler(TessBaseAPI* api, const char* jpgdata, int len) { return false; }
+	};
+
+	/**
+	 * Renders tesseract output into an osd text string
+	 */
+	class TESS_API TessOsdRenderer : public TessResultRenderer {
+	public:
+		explicit TessOsdRenderer(const char* outputbase);
+
+	protected:
+		virtual bool AddImageHandler(TessBaseAPI* api);
+		virtual bool AddImageHandler(TessBaseAPI* api, const char* jpgdata, int len) { return false; }
+	};
+
+}  // namespace tesseract.
+
+#endif  // TESSERACT_API_RENDERER_H_
--- a/hgdriver/3rdparty/hgOCR/include/api/tesseractmain.cpp
+++ b/hgdriver/3rdparty/hgOCR/include/api/tesseractmain.cpp
@ -0,0 +1,546 @@
+/**********************************************************************
+* File:        tessedit.cpp  (Formerly tessedit.c)
+* Description: Main program for merge of tess and editor.
+* Author:                  Ray Smith
+* Created:                 Tue Jan 07 15:21:46 GMT 1992
+*
+* (C) Copyright 1992, Hewlett-Packard Ltd.
+** Licensed under the Apache License, Version 2.0 (the "License");
+** you may not use this file except in compliance with the License.
+** You may obtain a copy of the License at
+** http://www.apache.org/licenses/LICENSE-2.0
+** Unless required by applicable law or agreed to in writing, software
+** distributed under the License is distributed on an "AS IS" BASIS,
+** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+** See the License for the specific language governing permissions and
+** limitations under the License.
+*
+**********************************************************************/
+
+// Include automatically generated configuration file if running autoconf
+#ifdef HAVE_CONFIG_H
+#include "config_auto.h"
+#endif
+
+#include <iostream>
+
+#include "allheaders.h"
+#include "baseapi.h"
+#include "basedir.h"
+#include "dict.h"
+#include "openclwrapper.h"
+#include "osdetect.h"
+#include "renderer.h"
+#include "strngs.h"
+#include "tprintf.h"
+#include "StopWatch.h"
+
+#if defined(HAVE_TIFFIO_H) && defined(_WIN32)
+
+#include <tiffio.h>
+
+static void Win32WarningHandler(const char* module, const char* fmt,
+	va_list ap) {
+	if (module != NULL) {
+		fprintf(stderr, "%s: ", module);
+	}
+	fprintf(stderr, "Warning, ");
+	vfprintf(stderr, fmt, ap);
+	fprintf(stderr, ".\n");
+}
+
+#endif /* HAVE_TIFFIO_H &&  _WIN32 */
+
+void PrintVersionInfo() {
+	char* versionStrP;
+
+	printf("tesseract %s\n", tesseract::TessBaseAPI::Version());
+
+	versionStrP = getLeptonicaVersion();
+	printf(" %s\n", versionStrP);
+	lept_free(versionStrP);
+
+	versionStrP = getImagelibVersions();
+	printf("  %s\n", versionStrP);
+	lept_free(versionStrP);
+
+#ifdef USE_OPENCL
+	cl_platform_id platform[4];
+	cl_uint num_platforms;
+
+	printf(" OpenCL info:\n");
+	if (clGetPlatformIDs(4, platform, &num_platforms) == CL_SUCCESS) {
+		printf("  Found %u platform(s).\n", num_platforms);
+		for (unsigned n = 0; n < num_platforms; n++) {
+			char info[256];
+			if (clGetPlatformInfo(platform[n], CL_PLATFORM_NAME, 256, info, 0) ==
+				CL_SUCCESS) {
+				printf("  Platform %u name: %s.\n", n + 1, info);
+			}
+			if (clGetPlatformInfo(platform[n], CL_PLATFORM_VERSION, 256, info, 0) ==
+				CL_SUCCESS) {
+				printf("  Version: %s.\n", info);
+			}
+			cl_device_id devices[2];
+			cl_uint num_devices;
+			if (clGetDeviceIDs(platform[n], CL_DEVICE_TYPE_ALL, 2, devices,
+				&num_devices) == CL_SUCCESS) {
+				printf("  Found %u device(s).\n", num_devices);
+				for (unsigned i = 0; i < num_devices; ++i) {
+					if (clGetDeviceInfo(devices[i], CL_DEVICE_NAME, 256, info, 0) ==
+						CL_SUCCESS) {
+						printf("    Device %u name: %s.\n", i + 1, info);
+					}
+				}
+			}
+		}
+	}
+#endif
+}
+
+void PrintUsage(const char* program) {
+	printf(
+		"Usage:\n"
+		"  %s --help | --help-psm | --help-oem | --version\n"
+		"  %s --list-langs [--tessdata-dir PATH]\n"
+		"  %s --print-parameters [options...] [configfile...]\n"
+		"  %s imagename|stdin outputbase|stdout [options...] [configfile...]\n",
+		program, program, program, program);
+}
+
+void PrintHelpForPSM() {
+	const char* msg =
+		"Page segmentation modes:\n"
+		"  0    Orientation and script detection (OSD) only.\n"
+		"  1    Automatic page segmentation with OSD.\n"
+		"  2    Automatic page segmentation, but no OSD, or OCR.\n"
+		"  3    Fully automatic page segmentation, but no OSD. (Default)\n"
+		"  4    Assume a single column of text of variable sizes.\n"
+		"  5    Assume a single uniform block of vertically aligned text.\n"
+		"  6    Assume a single uniform block of text.\n"
+		"  7    Treat the image as a single text line.\n"
+		"  8    Treat the image as a single word.\n"
+		"  9    Treat the image as a single word in a circle.\n"
+		" 10    Treat the image as a single character.\n"
+		" 11    Sparse text. Find as much text as possible in no"
+		" particular order.\n"
+		" 12    Sparse text with OSD.\n"
+		" 13    Raw line. Treat the image as a single text line,\n"
+		"\t\t\tbypassing hacks that are Tesseract-specific.\n";
+
+	printf("%s", msg);
+}
+
+void PrintHelpForOEM() {
+	const char* msg =
+		"OCR Engine modes:\n"
+		"  0    Original Tesseract only.\n"
+		"  1    Cube only.\n"
+		"  2    Tesseract + cube.\n"
+		"  3    Default, based on what is available.\n";
+
+	printf("%s", msg);
+}
+
+void PrintHelpMessage(const char* program) {
+	PrintUsage(program);
+
+	const char* ocr_options =
+		"OCR options:\n"
+		"  --tessdata-dir PATH   Specify the location of tessdata path.\n"
+		"  --user-words PATH     Specify the location of user words file.\n"
+		"  --user-patterns PATH  Specify the location of user patterns file.\n"
+		"  -l LANG[+LANG]        Specify language(s) used for OCR.\n"
+		"  -c VAR=VALUE          Set value for config variables.\n"
+		"                        Multiple -c arguments are allowed.\n"
+		"  --psm NUM             Specify page segmentation mode.\n"
+		"  --oem NUM             Specify OCR Engine mode.\n"
+		"NOTE: These options must occur before any configfile.\n";
+
+	printf("\n%s\n", ocr_options);
+	PrintHelpForPSM();
+	PrintHelpForOEM();
+
+	const char* single_options =
+		"Single options:\n"
+		"  -h, --help            Show this help message.\n"
+		"  --help-psm            Show page segmentation modes.\n"
+		"  --help-oem            Show OCR Engine modes.\n"
+		"  -v, --version         Show version information.\n"
+		"  --list-langs          List available languages for tesseract engine.\n"
+		"  --print-parameters    Print tesseract parameters to stdout.\n";
+
+	printf("\n%s", single_options);
+}
+
+void SetVariablesFromCLArgs(tesseract::TessBaseAPI* api, int argc,
+	char** argv) {
+	char opt1[256], opt2[255];
+	for (int i = 0; i < argc; i++) {
+		if (strcmp(argv[i], "-c") == 0 && i + 1 < argc) {
+			strncpy(opt1, argv[i + 1], 255);
+			opt1[255] = '\0';
+			char* p = strchr(opt1, '=');
+			if (!p) {
+				fprintf(stderr, "Missing = in configvar assignment\n");
+				exit(1);
+			}
+			*p = 0;
+			strncpy(opt2, strchr(argv[i + 1], '=') + 1, 255);
+			opt2[254] = 0;
+			++i;
+
+			if (!api->SetVariable(opt1, opt2)) {
+				fprintf(stderr, "Could not set option: %s=%s\n", opt1, opt2);
+			}
+		}
+	}
+}
+
+void PrintLangsList(tesseract::TessBaseAPI* api) {
+	GenericVector<STRING> languages;
+	api->GetAvailableLanguagesAsVector(&languages);
+	printf("List of available languages (%d):\n", languages.size());
+	for (int index = 0; index < languages.size(); ++index) {
+		STRING& string = languages[index];
+		printf("%s\n", string.string());
+	}
+	api->End();
+}
+
+void PrintBanner() {
+	tprintf("Tesseract Open Source OCR Engine v%s with Leptonica\n",
+		tesseract::TessBaseAPI::Version());
+}
+
+/**
+ * We have 2 possible sources of pagesegmode: a config file and
+ * the command line. For backwards compatibility reasons, the
+ * default in tesseract is tesseract::PSM_SINGLE_BLOCK, but the
+ * default for this program is tesseract::PSM_AUTO. We will let
+ * the config file take priority, so the command-line default
+ * can take priority over the tesseract default, so we use the
+ * value from the command line only if the retrieved mode
+ * is still tesseract::PSM_SINGLE_BLOCK, indicating no change
+ * in any config file. Therefore the only way to force
+ * tesseract::PSM_SINGLE_BLOCK is from the command line.
+ * It would be simpler if we could set the value before Init,
+ * but that doesn't work.
+ */
+void FixPageSegMode(tesseract::TessBaseAPI* api,
+	tesseract::PageSegMode pagesegmode) {
+	if (api->GetPageSegMode() == tesseract::PSM_SINGLE_BLOCK)
+		api->SetPageSegMode(pagesegmode);
+}
+
+// NOTE: arg_i is used here to avoid ugly *i so many times in this function
+void ParseArgs(const int argc, char** argv, const char** lang,
+	const char** image, const char** outputbase,
+	const char** datapath, bool* list_langs, bool* print_parameters,
+	GenericVector<STRING>* vars_vec,
+	GenericVector<STRING>* vars_values, int* arg_i,
+	tesseract::PageSegMode* pagesegmode,
+	tesseract::OcrEngineMode* enginemode) {
+	if (argc == 1) {
+		PrintHelpMessage(argv[0]);
+		exit(0);
+	}
+
+	if (argc == 2) {
+		if ((strcmp(argv[1], "-h") == 0) || (strcmp(argv[1], "--help") == 0)) {
+			PrintHelpMessage(argv[0]);
+			exit(0);
+		}
+		if ((strcmp(argv[1], "--help-psm") == 0)) {
+			PrintHelpForPSM();
+			exit(0);
+		}
+		if ((strcmp(argv[1], "--help-oem") == 0)) {
+			PrintHelpForOEM();
+			exit(0);
+		}
+		if ((strcmp(argv[1], "-v") == 0) || (strcmp(argv[1], "--version") == 0)) {
+			PrintVersionInfo();
+			exit(0);
+		}
+	}
+
+	bool noocr = false;
+	int i = 1;
+	while (i < argc && (*outputbase == NULL || argv[i][0] == '-')) {
+		if (strcmp(argv[i], "-l") == 0 && i + 1 < argc) {
+			*lang = argv[i + 1];
+			++i;
+		}
+		else if (strcmp(argv[i], "--tessdata-dir") == 0 && i + 1 < argc) {
+			*datapath = argv[i + 1];
+			++i;
+		}
+		else if (strcmp(argv[i], "--user-words") == 0 && i + 1 < argc) {
+			vars_vec->push_back("user_words_file");
+			vars_values->push_back(argv[i + 1]);
+			++i;
+		}
+		else if (strcmp(argv[i], "--user-patterns") == 0 && i + 1 < argc) {
+			vars_vec->push_back("user_patterns_file");
+			vars_values->push_back(argv[i + 1]);
+			++i;
+		}
+		else if (strcmp(argv[i], "--list-langs") == 0) {
+			noocr = true;
+			*list_langs = true;
+		}
+		else if (strcmp(argv[i], "-psm") == 0 && i + 1 < argc) {
+			// The parameter -psm is deprecated and was replaced by --psm.
+			// It is still supported for compatibility reasons.
+			*pagesegmode = static_cast<tesseract::PageSegMode>(atoi(argv[i + 1]));
+			++i;
+		}
+		else if (strcmp(argv[i], "--psm") == 0 && i + 1 < argc) {
+			*pagesegmode = static_cast<tesseract::PageSegMode>(atoi(argv[i + 1]));
+			++i;
+		}
+		else if (strcmp(argv[i], "--oem") == 0 && i + 1 < argc) {
+			*enginemode = static_cast<tesseract::OcrEngineMode>(atoi(argv[i + 1]));
+			++i;
+		}
+		else if (strcmp(argv[i], "--print-parameters") == 0) {
+			noocr = true;
+			*print_parameters = true;
+		}
+		else if (strcmp(argv[i], "-c") == 0 && i + 1 < argc) {
+			// handled properly after api init
+			++i;
+		}
+		else if (*image == NULL) {
+			*image = argv[i];
+		}
+		else if (*outputbase == NULL) {
+			*outputbase = argv[i];
+		}
+		++i;
+	}
+
+	*arg_i = i;
+
+	if (argc == 2 && strcmp(argv[1], "--list-langs") == 0) {
+		*list_langs = true;
+		noocr = true;
+	}
+
+	if (*outputbase == NULL && noocr == false) {
+		PrintHelpMessage(argv[0]);
+		exit(1);
+	}
+}
+
+void PreloadRenderers(
+	tesseract::TessBaseAPI* api,
+	tesseract::PointerVector<tesseract::TessResultRenderer>* renderers,
+	tesseract::PageSegMode pagesegmode, const char* outputbase) {
+	if (pagesegmode == tesseract::PSM_OSD_ONLY) {
+		renderers->push_back(new tesseract::TessOsdRenderer(outputbase));
+	}
+	else {
+		bool b;
+		api->GetBoolVariable("tessedit_create_hocr", &b);
+		if (b) {
+			bool font_info;
+			api->GetBoolVariable("hocr_font_info", &font_info);
+			renderers->push_back(
+				new tesseract::TessHOcrRenderer(outputbase, font_info));
+		}
+
+		api->GetBoolVariable("tessedit_create_tsv", &b);
+		if (b) {
+			bool font_info;
+			api->GetBoolVariable("hocr_font_info", &font_info);
+			renderers->push_back(
+				new tesseract::TessTsvRenderer(outputbase, font_info));
+		}
+
+		api->GetBoolVariable("tessedit_create_pdf", &b);
+		if (b) {
+			bool textonly;
+			api->GetBoolVariable("textonly_pdf", &textonly);
+			renderers->push_back(new tesseract::TessPDFRenderer(
+				outputbase, api->GetDatapath(), textonly));
+		}
+
+		api->GetBoolVariable("tessedit_write_unlv", &b);
+		if (b) {
+			renderers->push_back(new tesseract::TessUnlvRenderer(outputbase));
+		}
+
+		api->GetBoolVariable("tessedit_create_boxfile", &b);
+		if (b) {
+			renderers->push_back(new tesseract::TessBoxTextRenderer(outputbase));
+		}
+
+		api->GetBoolVariable("tessedit_create_txt", &b);
+		if (b || renderers->empty()) {
+			renderers->push_back(new tesseract::TessTextRenderer(outputbase));
+		}
+	}
+
+	if (!renderers->empty()) {
+		// Since the PointerVector auto-deletes, null-out the renderers that are
+		// added to the root, and leave the root in the vector.
+		for (int r = 1; r < renderers->size(); ++r) {
+			(*renderers)[0]->insert((*renderers)[r]);
+			(*renderers)[r] = NULL;
+		}
+	}
+}
+
+/**********************************************************************
+ *  main()
+ *
+ **********************************************************************/
+
+int main(int argc, char** argv) {
+	const char* lang = "osd";
+	const char* image = NULL;
+	const char* outputbase = NULL;
+	const char* datapath = NULL;
+	bool list_langs = false;
+	bool print_parameters = false;
+	int arg_i = 1;
+	tesseract::PageSegMode pagesegmode = tesseract::PSM_AUTO_OSD;
+	tesseract::OcrEngineMode enginemode = tesseract::OEM_DEFAULT;
+	/* main() calls functions like ParseArgs which call exit().
+	 * This results in memory leaks if vars_vec and vars_values are
+	 * declared as auto variables (destructor is not called then). */
+	static GenericVector<STRING> vars_vec;
+	static GenericVector<STRING> vars_values;
+
+#ifdef NDEBUG
+	// Disable debugging and informational messages from Leptonica.
+	setMsgSeverity(L_SEVERITY_ERROR);
+#endif
+
+#if defined(HAVE_TIFFIO_H) && defined(_WIN32)
+	/* Show libtiff warnings on console (not in GUI). */
+	TIFFSetWarningHandler(Win32WarningHandler);
+#endif /* HAVE_TIFFIO_H &&  _WIN32 */
+
+	ParseArgs(argc, argv, &lang, &image, &outputbase, &datapath, &list_langs,
+		&print_parameters, &vars_vec, &vars_values, &arg_i, &pagesegmode,
+		&enginemode);
+
+	bool banner = false;
+	if (outputbase != NULL && strcmp(outputbase, "-") &&
+		strcmp(outputbase, "stdout")) {
+		banner = true;
+	}
+
+	PERF_COUNT_START("Tesseract:main")
+
+		// Call GlobalDawgCache here to create the global DawgCache object before
+		// the TessBaseAPI object. This fixes the order of destructor calls:
+		// first TessBaseAPI must be destructed, DawgCache must be the last object.
+		tesseract::Dict::GlobalDawgCache();
+
+	// Avoid memory leak caused by auto variable when exit() is called.
+	static tesseract::TessBaseAPI api;
+
+	api.SetOutputName(outputbase);
+
+	int init_failed = api.Init(datapath, lang, enginemode, &(argv[arg_i]),
+		argc - arg_i, &vars_vec, &vars_values, false);
+	if (init_failed) {
+		fprintf(stderr, "Could not initialize tesseract.\n");
+		getchar();
+		return EXIT_FAILURE;
+	}
+
+	SetVariablesFromCLArgs(&api, argc, argv);
+
+	if (list_langs) {
+		PrintLangsList(&api);
+		getchar();
+		return EXIT_SUCCESS;
+	}
+
+	if (print_parameters) {
+		FILE* fout = stdout;
+		fprintf(stdout, "Tesseract parameters:\n");
+		api.PrintVariables(fout);
+		api.End();
+		getchar();
+		return EXIT_SUCCESS;
+	}
+
+	FixPageSegMode(&api, pagesegmode);
+
+	if (pagesegmode == tesseract::PSM_AUTO_OSD) {
+		int ret_val = EXIT_SUCCESS;
+
+		Pix* pixs = pixRead(image);
+		if (!pixs) {
+			fprintf(stderr, "Cannot open input file: %s\n", image);
+			getchar();
+			return 2;
+		}
+
+		api.SetImage(pixs);
+
+		tesseract::Orientation orientation;
+		tesseract::WritingDirection direction;
+		tesseract::TextlineOrder order;
+		float deskew_angle;
+
+		tesseract::PageIterator* it = api.AnalyseLayout();
+		if (it) {
+
+			StopWatch timer;
+			timer.reset();
+			it->Orientation(&orientation, &direction, &order, &deskew_angle);
+
+			tprintf(
+				"Orientation: %d\nWritingDirection: %d\nTextlineOrder: %d\n"
+				"Deskew angle: %.4f\n time: %.4f\n img: %s",
+				orientation, direction, order, deskew_angle, timer.elapsed_s(), image);
+			getchar();
+		}
+		else {
+			ret_val = EXIT_FAILURE;
+		}
+
+		delete it;
+
+		pixDestroy(&pixs);
+		return ret_val;
+	}
+
+	// set in_training_mode to true when using one of these configs:
+	// ambigs.train, box.train, box.train.stderr, linebox, rebox
+	bool b = false;
+	bool in_training_mode =
+		(api.GetBoolVariable("tessedit_ambigs_training", &b) && b) ||
+		(api.GetBoolVariable("tessedit_resegment_from_boxes", &b) && b) ||
+		(api.GetBoolVariable("tessedit_make_boxes_from_boxes", &b) && b);
+
+	// Avoid memory leak caused by auto variable when exit() is called.
+	static tesseract::PointerVector<tesseract::TessResultRenderer> renderers;
+
+	if (in_training_mode) {
+		renderers.push_back(NULL);
+	}
+	else {
+		PreloadRenderers(&api, &renderers, pagesegmode, outputbase);
+	}
+
+	if (!renderers.empty()) {
+		if (banner) PrintBanner();
+		bool succeed = api.ProcessPages(image, NULL, 0, renderers[0]);
+		if (!succeed) {
+			fprintf(stderr, "Error during processing.\n");
+			return EXIT_FAILURE;
+		}
+	}
+
+	PERF_COUNT_END
+
+		return EXIT_SUCCESS;
+}
--- a/hgdriver/3rdparty/hgOCR/include/ccmain/adaptions.cpp
+++ b/hgdriver/3rdparty/hgOCR/include/ccmain/adaptions.cpp
@ -0,0 +1,126 @@
+/**********************************************************************
+ * File:        adaptions.cpp  (Formerly adaptions.c)
+ * Description: Functions used to adapt to blobs already confidently
+ *					identified
+ * Author:		Chris Newton
+ * Created:		Thu Oct  7 10:17:28 BST 1993
+ *
+ * (C) Copyright 1992, Hewlett-Packard Ltd.
+ ** Licensed under the Apache License, Version 2.0 (the "License");
+ ** you may not use this file except in compliance with the License.
+ ** You may obtain a copy of the License at
+ ** http://www.apache.org/licenses/LICENSE-2.0
+ ** Unless required by applicable law or agreed to in writing, software
+ ** distributed under the License is distributed on an "AS IS" BASIS,
+ ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ ** See the License for the specific language governing permissions and
+ ** limitations under the License.
+ *
+ **********************************************************************/
+
+#ifdef _MSC_VER
+#pragma warning(disable:4244)  // Conversion warnings
+#pragma warning(disable:4305)  // int/float warnings
+#endif
+
+#ifdef __UNIX__
+#include          <assert.h>
+#endif
+#include          <ctype.h>
+#include          <string.h>
+#include          "tessbox.h"
+#include          "tessvars.h"
+#include          "memry.h"
+#include          "reject.h"
+#include          "control.h"
+#include          "stopper.h"
+#include          "tesseractclass.h"
+
+ // Include automatically generated configuration file if running autoconf.
+#ifdef HAVE_CONFIG_H
+#include "config_auto.h"
+#endif
+
+namespace tesseract {
+	BOOL8 Tesseract::word_adaptable(  //should we adapt?
+		WERD_RES *word,
+		uinT16 mode) {
+		if (tessedit_adaption_debug) {
+			tprintf("Running word_adaptable() for %s rating %.4f certainty %.4f\n",
+				word->best_choice == NULL ? "" :
+				word->best_choice->unichar_string().string(),
+				word->best_choice->rating(), word->best_choice->certainty());
+		}
+
+		BOOL8 status = FALSE;
+		BITS16 flags(mode);
+
+		enum MODES
+		{
+			ADAPTABLE_WERD,
+			ACCEPTABLE_WERD,
+			CHECK_DAWGS,
+			CHECK_SPACES,
+			CHECK_ONE_ELL_CONFLICT,
+			CHECK_AMBIG_WERD
+		};
+
+		/*
+		0: NO adaption
+		*/
+		if (mode == 0) {
+			if (tessedit_adaption_debug) tprintf("adaption disabled\n");
+			return FALSE;
+		}
+
+		if (flags.bit(ADAPTABLE_WERD)) {
+			status |= word->tess_would_adapt;  // result of Classify::AdaptableWord()
+			if (tessedit_adaption_debug && !status) {
+				tprintf("tess_would_adapt bit is false\n");
+			}
+		}
+
+		if (flags.bit(ACCEPTABLE_WERD)) {
+			status |= word->tess_accepted;
+			if (tessedit_adaption_debug && !status) {
+				tprintf("tess_accepted bit is false\n");
+			}
+		}
+
+		if (!status) {                  // If not set then
+			return FALSE;                // ignore other checks
+		}
+
+		if (flags.bit(CHECK_DAWGS) &&
+			(word->best_choice->permuter() != SYSTEM_DAWG_PERM) &&
+			(word->best_choice->permuter() != FREQ_DAWG_PERM) &&
+			(word->best_choice->permuter() != USER_DAWG_PERM) &&
+			(word->best_choice->permuter() != NUMBER_PERM)) {
+			if (tessedit_adaption_debug) tprintf("word not in dawgs\n");
+			return FALSE;
+		}
+
+		if (flags.bit(CHECK_ONE_ELL_CONFLICT) && one_ell_conflict(word, FALSE)) {
+			if (tessedit_adaption_debug) tprintf("word has ell conflict\n");
+			return FALSE;
+		}
+
+		if (flags.bit(CHECK_SPACES) &&
+			(strchr(word->best_choice->unichar_string().string(), ' ') != NULL)) {
+			if (tessedit_adaption_debug) tprintf("word contains spaces\n");
+			return FALSE;
+		}
+
+		if (flags.bit(CHECK_AMBIG_WERD) &&
+			word->best_choice->dangerous_ambig_found()) {
+			if (tessedit_adaption_debug) tprintf("word is ambiguous\n");
+			return FALSE;
+		}
+
+		if (tessedit_adaption_debug) {
+			tprintf("returning status %d\n", status);
+		}
+		return status;
+	}
+
+}  // namespace tesseract
--- a/hgdriver/3rdparty/hgOCR/include/ccmain/applybox.cpp
+++ b/hgdriver/3rdparty/hgOCR/include/ccmain/applybox.cpp
@ -0,0 +1,814 @@
+/**********************************************************************
+ * File:        applybox.cpp  (Formerly applybox.c)
+ * Description: Re segment rows according to box file data
+ * Author:      Phil Cheatle
+ * Created:     Wed Nov 24 09:11:23 GMT 1993
+ *
+ * (C) Copyright 1993, Hewlett-Packard Ltd.
+ ** Licensed under the Apache License, Version 2.0 (the "License");
+ ** you may not use this file except in compliance with the License.
+ ** You may obtain a copy of the License at
+ ** http://www.apache.org/licenses/LICENSE-2.0
+ ** Unless required by applicable law or agreed to in writing, software
+ ** distributed under the License is distributed on an "AS IS" BASIS,
+ ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ ** See the License for the specific language governing permissions and
+ ** limitations under the License.
+ *
+ **********************************************************************/
+
+#ifdef _MSC_VER
+#pragma warning(disable:4244)  // Conversion warnings
+#endif
+
+#include <ctype.h>
+#include <string.h>
+#ifdef __UNIX__
+#include <assert.h>
+#include <errno.h>
+#endif
+#include "allheaders.h"
+#include "boxread.h"
+#include "chopper.h"
+#include "pageres.h"
+#include "unichar.h"
+#include "unicharset.h"
+#include "tesseractclass.h"
+#include "genericvector.h"
+
+ /** Max number of blobs to classify together in FindSegmentation. */
+const int kMaxGroupSize = 4;
+/// Max fraction of median allowed as deviation in xheight before switching
+/// to median.
+const double kMaxXHeightDeviationFraction = 0.125;
+
+/**
+ * The box file is assumed to contain box definitions, one per line, of the
+ * following format for blob-level boxes:
+ * @verbatim
+ *   <UTF8 str> <left> <bottom> <right> <top> <page id>
+ * @endverbatim
+ * and for word/line-level boxes:
+ * @verbatim
+ *   WordStr <left> <bottom> <right> <top> <page id> #<space-delimited word str>
+ * @endverbatim
+ * NOTES:
+ * The boxes use tesseract coordinates, i.e. 0,0 is at BOTTOM-LEFT.
+ *
+ * <page id> is 0-based, and the page number is used for multipage input (tiff).
+ *
+ * In the blob-level form, each line represents a recognizable unit, which may
+ * be several UTF-8 bytes, but there is a bounding box around each recognizable
+ * unit, and no classifier is needed to train in this mode (bootstrapping.)
+ *
+ * In the word/line-level form, the line begins with the literal "WordStr", and
+ * the bounding box bounds either a whole line or a whole word. The recognizable
+ * units in the word/line are listed after the # at the end of the line and
+ * are space delimited, ignoring any original spaces on the line.
+ * Eg.
+ * @verbatim
+ * word -> #w o r d
+ * multi word line -> #m u l t i w o r d l i n e
+ * @endverbatim
+ * The recognizable units must be space-delimited in order to allow multiple
+ * unicodes to be used for a single recognizable unit, eg Hindi.
+ *
+ * In this mode, the classifier must have been pre-trained with the desired
+ * character set, or it will not be able to find the character segmentations.
+ */
+
+namespace tesseract {
+
+	static void clear_any_old_text(BLOCK_LIST *block_list) {
+		BLOCK_IT block_it(block_list);
+		for (block_it.mark_cycle_pt();
+			!block_it.cycled_list(); block_it.forward()) {
+			ROW_IT row_it(block_it.data()->row_list());
+			for (row_it.mark_cycle_pt(); !row_it.cycled_list(); row_it.forward()) {
+				WERD_IT word_it(row_it.data()->word_list());
+				for (word_it.mark_cycle_pt();
+					!word_it.cycled_list(); word_it.forward()) {
+					word_it.data()->set_text("");
+				}
+			}
+		}
+	}
+
+	// Applies the box file based on the image name fname, and resegments
+	// the words in the block_list (page), with:
+	// blob-mode: one blob per line in the box file, words as input.
+	// word/line-mode: one blob per space-delimited unit after the #, and one word
+	// per line in the box file. (See comment above for box file format.)
+	// If find_segmentation is true, (word/line mode) then the classifier is used
+	// to re-segment words/lines to match the space-delimited truth string for
+	// each box. In this case, the input box may be for a word or even a whole
+	// text line, and the output words will contain multiple blobs corresponding
+	// to the space-delimited input string.
+	// With find_segmentation false, no classifier is needed, but the chopper
+	// can still be used to correctly segment touching characters with the help
+	// of the input boxes.
+	// In the returned PAGE_RES, the WERD_RES are setup as they would be returned
+	// from normal classification, ie. with a word, chopped_word, rebuild_word,
+	// seam_array, denorm, box_word, and best_state, but NO best_choice or
+	// raw_choice, as they would require a UNICHARSET, which we aim to avoid.
+	// Instead, the correct_text member of WERD_RES is set, and this may be later
+	// converted to a best_choice using CorrectClassifyWords. CorrectClassifyWords
+	// is not required before calling ApplyBoxTraining.
+	PAGE_RES* Tesseract::ApplyBoxes(const STRING& fname,
+		bool find_segmentation,
+		BLOCK_LIST *block_list) {
+		GenericVector<TBOX> boxes;
+		GenericVector<STRING> texts, full_texts;
+		if (!ReadAllBoxes(applybox_page, true, fname, &boxes, &texts, &full_texts,
+			NULL)) {
+			return NULL;  // Can't do it.
+		}
+
+		int box_count = boxes.size();
+		int box_failures = 0;
+		// Add an empty everything to the end.
+		boxes.push_back(TBOX());
+		texts.push_back(STRING());
+		full_texts.push_back(STRING());
+
+		// In word mode, we use the boxes to make a word for each box, but
+		// in blob mode we use the existing words and maximally chop them first.
+		PAGE_RES* page_res = find_segmentation ?
+			NULL : SetupApplyBoxes(boxes, block_list);
+		clear_any_old_text(block_list);
+
+		for (int i = 0; i < boxes.size() - 1; i++) {
+			bool foundit = false;
+			if (page_res != NULL) {
+				if (i == 0) {
+					foundit = ResegmentCharBox(page_res, NULL, boxes[i], boxes[i + 1],
+						full_texts[i].string());
+				}
+				else {
+					foundit = ResegmentCharBox(page_res, &boxes[i - 1], boxes[i],
+						boxes[i + 1], full_texts[i].string());
+				}
+			}
+			else {
+				foundit = ResegmentWordBox(block_list, boxes[i], boxes[i + 1],
+					texts[i].string());
+			}
+			if (!foundit) {
+				box_failures++;
+				ReportFailedBox(i, boxes[i], texts[i].string(),
+					"FAILURE! Couldn't find a matching blob");
+			}
+		}
+
+		if (page_res == NULL) {
+			// In word/line mode, we now maximally chop all the words and resegment
+			// them with the classifier.
+			page_res = SetupApplyBoxes(boxes, block_list);
+			ReSegmentByClassification(page_res);
+		}
+		if (applybox_debug > 0) {
+			tprintf("APPLY_BOXES:\n");
+			tprintf("   Boxes read from boxfile:  %6d\n", box_count);
+			if (box_failures > 0)
+				tprintf("   Boxes failed resegmentation:  %6d\n", box_failures);
+		}
+		TidyUp(page_res);
+		return page_res;
+	}
+
+	// Helper computes median xheight in the image.
+	static double MedianXHeight(BLOCK_LIST *block_list) {
+		BLOCK_IT block_it(block_list);
+		STATS xheights(0, block_it.data()->bounding_box().height());
+		for (block_it.mark_cycle_pt();
+			!block_it.cycled_list(); block_it.forward()) {
+			ROW_IT row_it(block_it.data()->row_list());
+			for (row_it.mark_cycle_pt(); !row_it.cycled_list(); row_it.forward()) {
+				xheights.add(IntCastRounded(row_it.data()->x_height()), 1);
+			}
+		}
+		return xheights.median();
+	}
+
+	/// Any row xheight that is significantly different from the median is set
+	/// to the median.
+	void Tesseract::PreenXHeights(BLOCK_LIST *block_list) {
+		double median_xheight = MedianXHeight(block_list);
+		double max_deviation = kMaxXHeightDeviationFraction * median_xheight;
+		// Strip all fuzzy space markers to simplify the PAGE_RES.
+		BLOCK_IT b_it(block_list);
+		for (b_it.mark_cycle_pt(); !b_it.cycled_list(); b_it.forward()) {
+			BLOCK* block = b_it.data();
+			ROW_IT r_it(block->row_list());
+			for (r_it.mark_cycle_pt(); !r_it.cycled_list(); r_it.forward()) {
+				ROW* row = r_it.data();
+				float diff = fabs(row->x_height() - median_xheight);
+				if (diff > max_deviation) {
+					if (applybox_debug) {
+						tprintf("row xheight=%g, but median xheight = %g\n",
+							row->x_height(), median_xheight);
+					}
+					row->set_x_height(static_cast<float>(median_xheight));
+				}
+			}
+		}
+	}
+
+	/// Builds a PAGE_RES from the block_list in the way required for ApplyBoxes:
+	/// All fuzzy spaces are removed, and all the words are maximally chopped.
+	PAGE_RES* Tesseract::SetupApplyBoxes(const GenericVector<TBOX>& boxes,
+		BLOCK_LIST *block_list) {
+		PreenXHeights(block_list);
+		// Strip all fuzzy space markers to simplify the PAGE_RES.
+		BLOCK_IT b_it(block_list);
+		for (b_it.mark_cycle_pt(); !b_it.cycled_list(); b_it.forward()) {
+			BLOCK* block = b_it.data();
+			ROW_IT r_it(block->row_list());
+			for (r_it.mark_cycle_pt(); !r_it.cycled_list(); r_it.forward()) {
+				ROW* row = r_it.data();
+				WERD_IT w_it(row->word_list());
+				for (w_it.mark_cycle_pt(); !w_it.cycled_list(); w_it.forward()) {
+					WERD* word = w_it.data();
+					if (word->cblob_list()->empty()) {
+						delete w_it.extract();
+					}
+					else {
+						word->set_flag(W_FUZZY_SP, false);
+						word->set_flag(W_FUZZY_NON, false);
+					}
+				}
+			}
+		}
+		PAGE_RES* page_res = new PAGE_RES(false, block_list, NULL);
+		PAGE_RES_IT pr_it(page_res);
+		WERD_RES* word_res;
+		while ((word_res = pr_it.word()) != NULL) {
+			MaximallyChopWord(boxes, pr_it.block()->block,
+				pr_it.row()->row, word_res);
+			pr_it.forward();
+		}
+		return page_res;
+	}
+
+	/// Tests the chopper by exhaustively running chop_one_blob.
+	/// The word_res will contain filled chopped_word, seam_array, denorm,
+	/// box_word and best_state for the maximally chopped word.
+	void Tesseract::MaximallyChopWord(const GenericVector<TBOX>& boxes,
+		BLOCK* block, ROW* row,
+		WERD_RES* word_res) {
+		if (!word_res->SetupForRecognition(unicharset, this, BestPix(),
+			tessedit_ocr_engine_mode, NULL,
+			classify_bln_numeric_mode,
+			textord_use_cjk_fp_model,
+			poly_allow_detailed_fx,
+			row, block)) {
+			word_res->CloneChoppedToRebuild();
+			return;
+		}
+		if (chop_debug) {
+			tprintf("Maximally chopping word at:");
+			word_res->word->bounding_box().print();
+		}
+		GenericVector<BLOB_CHOICE*> blob_choices;
+		ASSERT_HOST(!word_res->chopped_word->blobs.empty());
+		float rating = static_cast<float>(MAX_INT8);
+		for (int i = 0; i < word_res->chopped_word->NumBlobs(); ++i) {
+			// The rating and certainty are not quite arbitrary. Since
+			// select_blob_to_chop uses the worst certainty to choose, they all have
+			// to be different, so starting with MAX_INT8, subtract 1/8 for each blob
+			// in here, and then divide by e each time they are chopped, which
+			// should guarantee a set of unequal values for the whole tree of blobs
+			// produced, however much chopping is required. The chops are thus only
+			// limited by the ability of the chopper to find suitable chop points,
+			// and not by the value of the certainties.
+			BLOB_CHOICE* choice =
+				new BLOB_CHOICE(0, rating, -rating, -1, 0.0f, 0.0f, 0.0f, BCC_FAKE);
+			blob_choices.push_back(choice);
+			rating -= 0.125f;
+		}
+		const double e = exp(1.0);  // The base of natural logs.
+		int blob_number;
+		int right_chop_index = 0;
+		if (!assume_fixed_pitch_char_segment) {
+			// We only chop if the language is not fixed pitch like CJK.
+			SEAM* seam = NULL;
+			while ((seam = chop_one_blob(boxes, blob_choices, word_res,
+				&blob_number)) != NULL) {
+				word_res->InsertSeam(blob_number, seam);
+				BLOB_CHOICE* left_choice = blob_choices[blob_number];
+				rating = left_choice->rating() / e;
+				left_choice->set_rating(rating);
+				left_choice->set_certainty(-rating);
+				// combine confidence w/ serial #
+				BLOB_CHOICE* right_choice = new BLOB_CHOICE(++right_chop_index,
+					rating - 0.125f, -rating, -1,
+					0.0f, 0.0f, 0.0f, BCC_FAKE);
+				blob_choices.insert(right_choice, blob_number + 1);
+			}
+		}
+		word_res->CloneChoppedToRebuild();
+		word_res->FakeClassifyWord(blob_choices.size(), &blob_choices[0]);
+	}
+
+	/// Helper to compute the dispute resolution metric.
+	/// Disputed blob resolution. The aim is to give the blob to the most
+	/// appropriate boxfile box. Most of the time it is obvious, but if
+	/// two boxfile boxes overlap significantly it is not. If a small boxfile
+	/// box takes most of the blob, and a large boxfile box does too, then
+	/// we want the small boxfile box to get it, but if the small box
+	/// is much smaller than the blob, we don't want it to get it.
+	/// Details of the disputed blob resolution:
+	/// Given a box with area A, and a blob with area B, with overlap area C,
+	/// then the miss metric is (A-C)(B-C)/(AB) and the box with minimum
+	/// miss metric gets the blob.
+	static double BoxMissMetric(const TBOX& box1, const TBOX& box2) {
+		int overlap_area = box1.intersection(box2).area();
+		double miss_metric = box1.area() - overlap_area;
+		miss_metric /= box1.area();
+		miss_metric *= box2.area() - overlap_area;
+		miss_metric /= box2.area();
+		return miss_metric;
+	}
+
+	/// Gather consecutive blobs that match the given box into the best_state
+	/// and corresponding correct_text.
+	///
+	/// Fights over which box owns which blobs are settled by pre-chopping and
+	/// applying the blobs to box or next_box with the least non-overlap.
+	/// @return false if the box was in error, which can only be caused by
+	/// failing to find an appropriate blob for a box.
+	///
+	/// This means that occasionally, blobs may be incorrectly segmented if the
+	/// chopper fails to find a suitable chop point.
+	bool Tesseract::ResegmentCharBox(PAGE_RES* page_res, const TBOX *prev_box,
+		const TBOX& box, const TBOX& next_box,
+		const char* correct_text) {
+		if (applybox_debug > 1) {
+			tprintf("\nAPPLY_BOX: in ResegmentCharBox() for %s\n", correct_text);
+		}
+		PAGE_RES_IT page_res_it(page_res);
+		WERD_RES* word_res;
+		for (word_res = page_res_it.word(); word_res != NULL;
+			word_res = page_res_it.forward()) {
+			if (!word_res->box_word->bounding_box().major_overlap(box))
+				continue;
+			if (applybox_debug > 1) {
+				tprintf("Checking word box:");
+				word_res->box_word->bounding_box().print();
+			}
+			int word_len = word_res->box_word->length();
+			for (int i = 0; i < word_len; ++i) {
+				TBOX char_box = TBOX();
+				int blob_count = 0;
+				for (blob_count = 0; i + blob_count < word_len; ++blob_count) {
+					TBOX blob_box = word_res->box_word->BlobBox(i + blob_count);
+					if (!blob_box.major_overlap(box))
+						break;
+					if (word_res->correct_text[i + blob_count].length() > 0)
+						break;  // Blob is claimed already.
+					double current_box_miss_metric = BoxMissMetric(blob_box, box);
+					double next_box_miss_metric = BoxMissMetric(blob_box, next_box);
+					if (applybox_debug > 2) {
+						tprintf("Checking blob:");
+						blob_box.print();
+						tprintf("Current miss metric = %g, next = %g\n",
+							current_box_miss_metric, next_box_miss_metric);
+					}
+					if (current_box_miss_metric > next_box_miss_metric)
+						break;  // Blob is a better match for next box.
+					char_box += blob_box;
+				}
+				if (blob_count > 0) {
+					if (applybox_debug > 1) {
+						tprintf("Index [%d, %d) seem good.\n", i, i + blob_count);
+					}
+					if (!char_box.almost_equal(box, 3) &&
+						(box.x_gap(next_box) < -3 ||
+						(prev_box != NULL && prev_box->x_gap(box) < -3))) {
+						return false;
+					}
+					// We refine just the box_word, best_state and correct_text here.
+					// The rebuild_word is made in TidyUp.
+					// blob_count blobs are put together to match the box. Merge the
+					// box_word boxes, save the blob_count in the state and the text.
+					word_res->box_word->MergeBoxes(i, i + blob_count);
+					word_res->best_state[i] = blob_count;
+					word_res->correct_text[i] = correct_text;
+					if (applybox_debug > 2) {
+						tprintf("%d Blobs match: blob box:", blob_count);
+						word_res->box_word->BlobBox(i).print();
+						tprintf("Matches box:");
+						box.print();
+						tprintf("With next box:");
+						next_box.print();
+					}
+					// Eliminated best_state and correct_text entries for the consumed
+					// blobs.
+					for (int j = 1; j < blob_count; ++j) {
+						word_res->best_state.remove(i + 1);
+						word_res->correct_text.remove(i + 1);
+					}
+					// Assume that no box spans multiple source words, so we are done with
+					// this box.
+					if (applybox_debug > 1) {
+						tprintf("Best state = ");
+						for (int j = 0; j < word_res->best_state.size(); ++j) {
+							tprintf("%d ", word_res->best_state[j]);
+						}
+						tprintf("\n");
+						tprintf("Correct text = [[ ");
+						for (int j = 0; j < word_res->correct_text.size(); ++j) {
+							tprintf("%s ", word_res->correct_text[j].string());
+						}
+						tprintf("]]\n");
+					}
+					return true;
+				}
+			}
+		}
+		if (applybox_debug > 0) {
+			tprintf("FAIL!\n");
+		}
+		return false;  // Failure.
+	}
+
+	/// Consume all source blobs that strongly overlap the given box,
+	/// putting them into a new word, with the correct_text label.
+	/// Fights over which box owns which blobs are settled by
+	/// applying the blobs to box or next_box with the least non-overlap.
+	/// @return false if the box was in error, which can only be caused by
+	/// failing to find an overlapping blob for a box.
+	bool Tesseract::ResegmentWordBox(BLOCK_LIST *block_list,
+		const TBOX& box, const TBOX& next_box,
+		const char* correct_text) {
+		if (applybox_debug > 1) {
+			tprintf("\nAPPLY_BOX: in ResegmentWordBox() for %s\n", correct_text);
+		}
+		WERD* new_word = NULL;
+		BLOCK_IT b_it(block_list);
+		for (b_it.mark_cycle_pt(); !b_it.cycled_list(); b_it.forward()) {
+			BLOCK* block = b_it.data();
+			if (!box.major_overlap(block->bounding_box()))
+				continue;
+			ROW_IT r_it(block->row_list());
+			for (r_it.mark_cycle_pt(); !r_it.cycled_list(); r_it.forward()) {
+				ROW* row = r_it.data();
+				if (!box.major_overlap(row->bounding_box()))
+					continue;
+				WERD_IT w_it(row->word_list());
+				for (w_it.mark_cycle_pt(); !w_it.cycled_list(); w_it.forward()) {
+					WERD* word = w_it.data();
+					if (applybox_debug > 2) {
+						tprintf("Checking word:");
+						word->bounding_box().print();
+					}
+					if (word->text() != NULL && word->text()[0] != '\0')
+						continue;  // Ignore words that are already done.
+					if (!box.major_overlap(word->bounding_box()))
+						continue;
+					C_BLOB_IT blob_it(word->cblob_list());
+					for (blob_it.mark_cycle_pt(); !blob_it.cycled_list();
+						blob_it.forward()) {
+						C_BLOB* blob = blob_it.data();
+						TBOX blob_box = blob->bounding_box();
+						if (!blob_box.major_overlap(box))
+							continue;
+						double current_box_miss_metric = BoxMissMetric(blob_box, box);
+						double next_box_miss_metric = BoxMissMetric(blob_box, next_box);
+						if (applybox_debug > 2) {
+							tprintf("Checking blob:");
+							blob_box.print();
+							tprintf("Current miss metric = %g, next = %g\n",
+								current_box_miss_metric, next_box_miss_metric);
+						}
+						if (current_box_miss_metric > next_box_miss_metric)
+							continue;  // Blob is a better match for next box.
+						if (applybox_debug > 2) {
+							tprintf("Blob match: blob:");
+							blob_box.print();
+							tprintf("Matches box:");
+							box.print();
+							tprintf("With next box:");
+							next_box.print();
+						}
+						if (new_word == NULL) {
+							// Make a new word with a single blob.
+							new_word = word->shallow_copy();
+							new_word->set_text(correct_text);
+							w_it.add_to_end(new_word);
+						}
+						C_BLOB_IT new_blob_it(new_word->cblob_list());
+						new_blob_it.add_to_end(blob_it.extract());
+					}
+				}
+			}
+		}
+		if (new_word == NULL && applybox_debug > 0) tprintf("FAIL!\n");
+		return new_word != NULL;
+	}
+
+	/// Resegments the words by running the classifier in an attempt to find the
+	/// correct segmentation that produces the required string.
+	void Tesseract::ReSegmentByClassification(PAGE_RES* page_res) {
+		PAGE_RES_IT pr_it(page_res);
+		WERD_RES* word_res;
+		for (; (word_res = pr_it.word()) != NULL; pr_it.forward()) {
+			WERD* word = word_res->word;
+			if (word->text() == NULL || word->text()[0] == '\0')
+				continue;  // Ignore words that have no text.
+			  // Convert the correct text to a vector of UNICHAR_ID
+			GenericVector<UNICHAR_ID> target_text;
+			if (!ConvertStringToUnichars(word->text(), &target_text)) {
+				tprintf("APPLY_BOX: FAILURE: can't find class_id for '%s'\n",
+					word->text());
+				pr_it.DeleteCurrentWord();
+				continue;
+			}
+			if (!FindSegmentation(target_text, word_res)) {
+				tprintf("APPLY_BOX: FAILURE: can't find segmentation for '%s'\n",
+					word->text());
+				pr_it.DeleteCurrentWord();
+				continue;
+			}
+		}
+	}
+
+	/// Converts the space-delimited string of utf8 text to a vector of UNICHAR_ID.
+	/// @return false if an invalid UNICHAR_ID is encountered.
+	bool Tesseract::ConvertStringToUnichars(const char* utf8,
+		GenericVector<UNICHAR_ID>* class_ids) {
+		for (int step = 0; *utf8 != '\0'; utf8 += step) {
+			const char* next_space = strchr(utf8, ' ');
+			if (next_space == NULL)
+				next_space = utf8 + strlen(utf8);
+			step = next_space - utf8;
+			UNICHAR_ID class_id = unicharset.unichar_to_id(utf8, step);
+			if (class_id == INVALID_UNICHAR_ID) {
+				return false;
+			}
+			while (utf8[step] == ' ')
+				++step;
+			class_ids->push_back(class_id);
+		}
+		return true;
+	}
+
+	/// Resegments the word to achieve the target_text from the classifier.
+	/// Returns false if the re-segmentation fails.
+	/// Uses brute-force combination of up to #kMaxGroupSize adjacent blobs, and
+	/// applies a full search on the classifier results to find the best classified
+	/// segmentation. As a compromise to obtain better recall, 1-1 ambiguity
+	/// substitutions ARE used.
+	bool Tesseract::FindSegmentation(const GenericVector<UNICHAR_ID>& target_text,
+		WERD_RES* word_res) {
+		// Classify all required combinations of blobs and save results in choices.
+		int word_length = word_res->box_word->length();
+		GenericVector<BLOB_CHOICE_LIST*>* choices =
+			new GenericVector<BLOB_CHOICE_LIST*>[word_length];
+		for (int i = 0; i < word_length; ++i) {
+			for (int j = 1; j <= kMaxGroupSize && i + j <= word_length; ++j) {
+				BLOB_CHOICE_LIST* match_result = classify_piece(
+					word_res->seam_array, i, i + j - 1, "Applybox",
+					word_res->chopped_word, word_res->blamer_bundle);
+				if (applybox_debug > 2) {
+					tprintf("%d+%d:", i, j);
+					print_ratings_list("Segment:", match_result, unicharset);
+				}
+				choices[i].push_back(match_result);
+			}
+		}
+		// Search the segmentation graph for the target text. Must be an exact
+		// match. Using wildcards makes it difficult to find the correct
+		// segmentation even when it is there.
+		word_res->best_state.clear();
+		GenericVector<int> search_segmentation;
+		float best_rating = 0.0f;
+		SearchForText(choices, 0, word_length, target_text, 0, 0.0f,
+			&search_segmentation, &best_rating, &word_res->best_state);
+		for (int i = 0; i < word_length; ++i)
+			choices[i].delete_data_pointers();
+		delete[] choices;
+		if (word_res->best_state.empty()) {
+			// Build the original segmentation and if it is the same length as the
+			// truth, assume it will do.
+			int blob_count = 1;
+			for (int s = 0; s < word_res->seam_array.size(); ++s) {
+				SEAM* seam = word_res->seam_array[s];
+				if (!seam->HasAnySplits()) {
+					word_res->best_state.push_back(blob_count);
+					blob_count = 1;
+				}
+				else {
+					++blob_count;
+				}
+			}
+			word_res->best_state.push_back(blob_count);
+			if (word_res->best_state.size() != target_text.size()) {
+				word_res->best_state.clear();  // No good. Original segmentation bad size.
+				return false;
+			}
+		}
+		word_res->correct_text.clear();
+		for (int i = 0; i < target_text.size(); ++i) {
+			word_res->correct_text.push_back(
+				STRING(unicharset.id_to_unichar(target_text[i])));
+		}
+		return true;
+	}
+
+	/// Recursive helper to find a match to the target_text (from text_index
+	/// position) in the choices (from choices_pos position).
+	/// @param choices is an array of GenericVectors, of length choices_length,
+	/// with each element representing a starting position in the word, and the
+	/// #GenericVector holding classification results for a sequence of consecutive
+	/// blobs, with index 0 being a single blob, index 1 being 2 blobs etc.
+	/// @param choices_pos
+	/// @param choices_length
+	/// @param target_text
+	/// @param text_index
+	/// @param rating
+	/// @param segmentation
+	/// @param best_rating
+	/// @param best_segmentation
+	void Tesseract::SearchForText(const GenericVector<BLOB_CHOICE_LIST*>* choices,
+		int choices_pos, int choices_length,
+		const GenericVector<UNICHAR_ID>& target_text,
+		int text_index,
+		float rating, GenericVector<int>* segmentation,
+		float* best_rating,
+		GenericVector<int>* best_segmentation) {
+		const UnicharAmbigsVector& table = getDict().getUnicharAmbigs().dang_ambigs();
+		for (int length = 1; length <= choices[choices_pos].size(); ++length) {
+			// Rating of matching choice or worst choice if no match.
+			float choice_rating = 0.0f;
+			// Find the corresponding best BLOB_CHOICE.
+			BLOB_CHOICE_IT choice_it(choices[choices_pos][length - 1]);
+			for (choice_it.mark_cycle_pt(); !choice_it.cycled_list();
+				choice_it.forward()) {
+				BLOB_CHOICE* choice = choice_it.data();
+				choice_rating = choice->rating();
+				UNICHAR_ID class_id = choice->unichar_id();
+				if (class_id == target_text[text_index]) {
+					break;
+				}
+				// Search ambigs table.
+				if (class_id < table.size() && table[class_id] != NULL) {
+					AmbigSpec_IT spec_it(table[class_id]);
+					for (spec_it.mark_cycle_pt(); !spec_it.cycled_list();
+						spec_it.forward()) {
+						const AmbigSpec *ambig_spec = spec_it.data();
+						// We'll only do 1-1.
+						if (ambig_spec->wrong_ngram[1] == INVALID_UNICHAR_ID &&
+							ambig_spec->correct_ngram_id == target_text[text_index])
+							break;
+					}
+					if (!spec_it.cycled_list())
+						break;  // Found an ambig.
+				}
+			}
+			if (choice_it.cycled_list())
+				continue;  // No match.
+			segmentation->push_back(length);
+			if (choices_pos + length == choices_length &&
+				text_index + 1 == target_text.size()) {
+				// This is a complete match. If the rating is good record a new best.
+				if (applybox_debug > 2) {
+					tprintf("Complete match, rating = %g, best=%g, seglength=%d, best=%d\n",
+						rating + choice_rating, *best_rating, segmentation->size(),
+						best_segmentation->size());
+				}
+				if (best_segmentation->empty() || rating + choice_rating < *best_rating) {
+					*best_segmentation = *segmentation;
+					*best_rating = rating + choice_rating;
+				}
+			}
+			else if (choices_pos + length < choices_length &&
+				text_index + 1 < target_text.size()) {
+				if (applybox_debug > 3) {
+					tprintf("Match found for %d=%s:%s, at %d+%d, recursing...\n",
+						target_text[text_index],
+						unicharset.id_to_unichar(target_text[text_index]),
+						choice_it.data()->unichar_id() == target_text[text_index]
+						? "Match" : "Ambig",
+						choices_pos, length);
+				}
+				SearchForText(choices, choices_pos + length, choices_length, target_text,
+					text_index + 1, rating + choice_rating, segmentation,
+					best_rating, best_segmentation);
+				if (applybox_debug > 3) {
+					tprintf("End recursion for %d=%s\n", target_text[text_index],
+						unicharset.id_to_unichar(target_text[text_index]));
+				}
+			}
+			segmentation->truncate(segmentation->size() - 1);
+		}
+	}
+
+	/// - Counts up the labelled words and the blobs within.
+	/// - Deletes all unused or emptied words, counting the unused ones.
+	/// - Resets W_BOL and W_EOL flags correctly.
+	/// - Builds the rebuild_word and rebuilds the box_word and the best_choice.
+	void Tesseract::TidyUp(PAGE_RES* page_res) {
+		int ok_blob_count = 0;
+		int bad_blob_count = 0;
+		int ok_word_count = 0;
+		int unlabelled_words = 0;
+		PAGE_RES_IT pr_it(page_res);
+		WERD_RES* word_res;
+		for (; (word_res = pr_it.word()) != NULL; pr_it.forward()) {
+			int ok_in_word = 0;
+			int blob_count = word_res->correct_text.size();
+			WERD_CHOICE* word_choice = new WERD_CHOICE(word_res->uch_set, blob_count);
+			word_choice->set_permuter(TOP_CHOICE_PERM);
+			for (int c = 0; c < blob_count; ++c) {
+				if (word_res->correct_text[c].length() > 0) {
+					++ok_in_word;
+				}
+				// Since we only need a fake word_res->best_choice, the actual
+				// unichar_ids do not matter. Which is fortunate, since TidyUp()
+				// can be called while training Tesseract, at the stage where
+				// unicharset is not meaningful yet.
+				word_choice->append_unichar_id_space_allocated(
+					INVALID_UNICHAR_ID, word_res->best_state[c], 1.0f, -1.0f);
+			}
+			if (ok_in_word > 0) {
+				ok_blob_count += ok_in_word;
+				bad_blob_count += word_res->correct_text.size() - ok_in_word;
+				word_res->LogNewRawChoice(word_choice);
+				word_res->LogNewCookedChoice(1, false, word_choice);
+			}
+			else {
+				++unlabelled_words;
+				if (applybox_debug > 0) {
+					tprintf("APPLY_BOXES: Unlabelled word at :");
+					word_res->word->bounding_box().print();
+				}
+				pr_it.DeleteCurrentWord();
+				delete word_choice;
+			}
+		}
+		pr_it.restart_page();
+		for (; (word_res = pr_it.word()) != NULL; pr_it.forward()) {
+			// Denormalize back to a BoxWord.
+			word_res->RebuildBestState();
+			word_res->SetupBoxWord();
+			word_res->word->set_flag(W_BOL, pr_it.prev_row() != pr_it.row());
+			word_res->word->set_flag(W_EOL, pr_it.next_row() != pr_it.row());
+		}
+		if (applybox_debug > 0) {
+			tprintf("   Found %d good blobs.\n", ok_blob_count);
+			if (bad_blob_count > 0) {
+				tprintf("   Leaving %d unlabelled blobs in %d words.\n",
+					bad_blob_count, ok_word_count);
+			}
+			if (unlabelled_words > 0)
+				tprintf("   %d remaining unlabelled words deleted.\n", unlabelled_words);
+		}
+	}
+
+	/** Logs a bad box by line in the box file and box coords.*/
+	void Tesseract::ReportFailedBox(int boxfile_lineno, TBOX box,
+		const char *box_ch, const char *err_msg) {
+		tprintf("APPLY_BOXES: boxfile line %d/%s ((%d,%d),(%d,%d)): %s\n",
+			boxfile_lineno + 1, box_ch,
+			box.left(), box.bottom(), box.right(), box.top(), err_msg);
+	}
+
+	/** Creates a fake best_choice entry in each WERD_RES with the correct text.*/
+	void Tesseract::CorrectClassifyWords(PAGE_RES* page_res) {
+		PAGE_RES_IT pr_it(page_res);
+		for (WERD_RES *word_res = pr_it.word(); word_res != NULL;
+			word_res = pr_it.forward()) {
+			WERD_CHOICE* choice = new WERD_CHOICE(word_res->uch_set,
+				word_res->correct_text.size());
+			for (int i = 0; i < word_res->correct_text.size(); ++i) {
+				// The part before the first space is the real ground truth, and the
+				// rest is the bounding box location and page number.
+				GenericVector<STRING> tokens;
+				word_res->correct_text[i].split(' ', &tokens);
+				UNICHAR_ID char_id = unicharset.unichar_to_id(tokens[0].string());
+				choice->append_unichar_id_space_allocated(char_id,
+					word_res->best_state[i],
+					0.0f, 0.0f);
+			}
+			word_res->ClearWordChoices();
+			word_res->LogNewRawChoice(choice);
+			word_res->LogNewCookedChoice(1, false, choice);
+		}
+	}
+
+	/// Calls #LearnWord to extract features for labelled blobs within each word.
+	/// Features are stored in an internal buffer.
+	void Tesseract::ApplyBoxTraining(const STRING& fontname, PAGE_RES* page_res) {
+		PAGE_RES_IT pr_it(page_res);
+		int word_count = 0;
+		for (WERD_RES *word_res = pr_it.word(); word_res != NULL;
+			word_res = pr_it.forward()) {
+			LearnWord(fontname.string(), word_res);
+			++word_count;
+		}
+		tprintf("Generated training data for %d words\n", word_count);
+	}
+
+
+}  // namespace tesseract
--- a/hgdriver/3rdparty/hgOCR/include/ccmain/control.cpp
+++ b/hgdriver/3rdparty/hgOCR/include/ccmain/control.cpp
--- a/hgdriver/3rdparty/hgOCR/include/ccmain/control.h
+++ b/hgdriver/3rdparty/hgOCR/include/ccmain/control.h
@ -0,0 +1,44 @@
+/**********************************************************************
+ * File:        control.h  (Formerly control.h)
+ * Description: Module-independent matcher controller.
+ * Author:		Ray Smith
+ * Created:		Thu Apr 23 11:09:58 BST 1992
+ *
+ * (C) Copyright 1992, Hewlett-Packard Ltd.
+ ** Licensed under the Apache License, Version 2.0 (the "License");
+ ** you may not use this file except in compliance with the License.
+ ** You may obtain a copy of the License at
+ ** http://www.apache.org/licenses/LICENSE-2.0
+ ** Unless required by applicable law or agreed to in writing, software
+ ** distributed under the License is distributed on an "AS IS" BASIS,
+ ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ ** See the License for the specific language governing permissions and
+ ** limitations under the License.
+ *
+ **********************************************************************/
+
+ /**
+  * @file control.h
+  * Module-independent matcher controller.
+  */
+
+#ifndef           CONTROL_H
+#define           CONTROL_H
+
+#include          "params.h"
+#include          "ocrblock.h"
+#include          "ratngs.h"
+#include          "statistc.h"
+#include          "pageres.h"
+
+enum ACCEPTABLE_WERD_TYPE
+{
+	AC_UNACCEPTABLE,               ///< Unacceptable word
+	AC_LOWER_CASE,                 ///< ALL lower case
+	AC_UPPER_CASE,                 ///< ALL upper case
+	AC_INITIAL_CAP,                ///< ALL but initial lc
+	AC_LC_ABBREV,                  ///< a.b.c.
+	AC_UC_ABBREV                   ///< A.B.C.
+};
+
+#endif
--- a/hgdriver/3rdparty/hgOCR/include/ccmain/cube_control.cpp
+++ b/hgdriver/3rdparty/hgOCR/include/ccmain/cube_control.cpp
@ -0,0 +1,440 @@
+/******************************************************************
+ * File:        cube_control.cpp
+ * Description: Tesseract class methods for invoking cube convolutional
+ *              neural network word recognizer.
+ * Author:      Raquel Romano
+ * Created:     September 2009
+ *
+ * (C) Copyright 2009, Google Inc.
+ ** Licensed under the Apache License, Version 2.0 (the "License");
+ ** you may not use this file except in compliance with the License.
+ ** You may obtain a copy of the License at
+ ** http://www.apache.org/licenses/LICENSE-2.0
+ ** Unless required by applicable law or agreed to in writing, software
+ ** distributed under the License is distributed on an "AS IS" BASIS,
+ ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ ** See the License for the specific language governing permissions and
+ ** limitations under the License.
+ **********************************************************************/
+
+ // Include automatically generated configuration file if running autoconf.
+#ifdef HAVE_CONFIG_H
+#include "config_auto.h"
+#endif
+
+#include "allheaders.h"
+
+#include "cube_object.h"
+#include "cube_reco_context.h"
+#include "tesseractclass.h"
+#include "tesseract_cube_combiner.h"
+
+namespace tesseract {
+
+	/**
+	 * @name convert_prob_to_tess_certainty
+	 *
+	 * Normalize a probability in the range [0.0, 1.0] to a tesseract
+	 * certainty in the range [-20.0, 0.0]
+	 */
+	static float convert_prob_to_tess_certainty(float prob) {
+		return (prob - 1.0) * 20.0;
+	}
+
+	/**
+	 * @name char_box_to_tbox
+	 *
+	 * Create a TBOX from a character bounding box. If nonzero, the
+	 * x_offset accounts for any additional padding of the word box that
+	 * should be taken into account.
+	 *
+	 */
+	TBOX char_box_to_tbox(Box* char_box, TBOX word_box, int x_offset) {
+		l_int32 left;
+		l_int32 top;
+		l_int32 width;
+		l_int32 height;
+		l_int32 right;
+		l_int32 bottom;
+
+		boxGetGeometry(char_box, &left, &top, &width, &height);
+		left += word_box.left() - x_offset;
+		right = left + width;
+		top = word_box.bottom() + word_box.height() - top;
+		bottom = top - height;
+		return TBOX(left, bottom, right, top);
+	}
+
+	/**
+	 * @name extract_cube_state
+	 *
+	 * Extract CharSamp objects and character bounding boxes from the
+	 * CubeObject's state. The caller should free both structres.
+	 *
+	 */
+	bool Tesseract::extract_cube_state(CubeObject* cube_obj,
+		int* num_chars,
+		Boxa** char_boxes,
+		CharSamp*** char_samples) {
+		if (!cube_obj) {
+			if (cube_debug_level > 0) {
+				tprintf("Cube WARNING (extract_cube_state): Invalid cube object "
+					"passed to extract_cube_state\n");
+			}
+			return false;
+		}
+
+		// Note that the CubeObject accessors return either the deslanted or
+		// regular objects search object or beam search object, whichever
+		// was used in the last call to Recognize()
+		CubeSearchObject* cube_search_obj = cube_obj->SrchObj();
+		if (!cube_search_obj) {
+			if (cube_debug_level > 0) {
+				tprintf("Cube WARNING (Extract_cube_state): Could not retrieve "
+					"cube's search object in extract_cube_state.\n");
+			}
+			return false;
+		}
+		BeamSearch *beam_search_obj = cube_obj->BeamObj();
+		if (!beam_search_obj) {
+			if (cube_debug_level > 0) {
+				tprintf("Cube WARNING (Extract_cube_state): Could not retrieve "
+					"cube's beam search object in extract_cube_state.\n");
+			}
+			return false;
+		}
+
+		// Get the character samples and bounding boxes by backtracking
+		// through the beam search path
+		int best_node_index = beam_search_obj->BestPresortedNodeIndex();
+		*char_samples = beam_search_obj->BackTrack(
+			cube_search_obj, best_node_index, num_chars, NULL, char_boxes);
+		if (!*char_samples)
+			return false;
+		return true;
+	}
+
+	/**
+	 * @name create_cube_box_word
+	 *
+	 * Fill the given BoxWord with boxes from character bounding
+	 * boxes. The char_boxes have local coordinates w.r.t. the
+	 * word bounding box, i.e., the left-most character bbox of each word
+	 * has (0,0) left-top coord, but the BoxWord must be defined in page
+	 * coordinates.
+	 */
+	bool Tesseract::create_cube_box_word(Boxa *char_boxes,
+		int num_chars,
+		TBOX word_box,
+		BoxWord* box_word) {
+		if (!box_word) {
+			if (cube_debug_level > 0) {
+				tprintf("Cube WARNING (create_cube_box_word): Invalid box_word.\n");
+			}
+			return false;
+		}
+
+		// Find the x-coordinate of left-most char_box, which could be
+		// nonzero if the word image was padded before recognition took place.
+		int x_offset = -1;
+		for (int i = 0; i < num_chars; ++i) {
+			Box* char_box = boxaGetBox(char_boxes, i, L_CLONE);
+			if (x_offset < 0 || char_box->x < x_offset) {
+				x_offset = char_box->x;
+			}
+			boxDestroy(&char_box);
+		}
+
+		for (int i = 0; i < num_chars; ++i) {
+			Box* char_box = boxaGetBox(char_boxes, i, L_CLONE);
+			TBOX tbox = char_box_to_tbox(char_box, word_box, x_offset);
+			boxDestroy(&char_box);
+			box_word->InsertBox(i, tbox);
+		}
+		return true;
+	}
+
+	/**
+	 * @name init_cube_objects
+	 *
+	 * Instantiates Tesseract object's CubeRecoContext and TesseractCubeCombiner.
+	 * Returns false if cube context could not be created or if load_combiner is
+	 * true, but the combiner could not be loaded.
+	 */
+	bool Tesseract::init_cube_objects(bool load_combiner,
+		TessdataManager *tessdata_manager) {
+		ASSERT_HOST(cube_cntxt_ == NULL);
+		ASSERT_HOST(tess_cube_combiner_ == NULL);
+
+		// Create the cube context object
+		cube_cntxt_ = CubeRecoContext::Create(this, tessdata_manager, &unicharset);
+		if (cube_cntxt_ == NULL) {
+			if (cube_debug_level > 0) {
+				tprintf("Cube WARNING (Tesseract::init_cube_objects()): Failed to "
+					"instantiate CubeRecoContext\n");
+			}
+			return false;
+		}
+
+		// Create the combiner object and load the combiner net for target languages.
+		if (load_combiner) {
+			tess_cube_combiner_ = new tesseract::TesseractCubeCombiner(cube_cntxt_);
+			if (!tess_cube_combiner_->LoadCombinerNet()) {
+				delete cube_cntxt_;
+				cube_cntxt_ = NULL;
+				delete tess_cube_combiner_;
+				tess_cube_combiner_ = NULL;
+				if (cube_debug_level > 0)
+					tprintf("Cube ERROR (Failed to instantiate TesseractCubeCombiner\n");
+				return false;
+			}
+		}
+		return true;
+	}
+
+	/**
+	 * @name run_cube_combiner
+	 *
+	 * Iterates through tesseract's results and calls cube on each word,
+	 * combining the results with the existing tesseract result.
+	 */
+	void Tesseract::run_cube_combiner(PAGE_RES *page_res) {
+		if (page_res == NULL || tess_cube_combiner_ == NULL)
+			return;
+		PAGE_RES_IT page_res_it(page_res);
+		// Iterate through the word results and call cube on each word.
+		for (page_res_it.restart_page(); page_res_it.word() != NULL;
+			page_res_it.forward()) {
+			BLOCK* block = page_res_it.block()->block;
+			if (block->poly_block() != NULL && !block->poly_block()->IsText())
+				continue;  // Don't deal with non-text blocks.
+			WERD_RES* word = page_res_it.word();
+			// Skip cube entirely if tesseract's certainty is greater than threshold.
+			int combiner_run_thresh = convert_prob_to_tess_certainty(
+				cube_cntxt_->Params()->CombinerRunThresh());
+			if (word->best_choice->certainty() >= combiner_run_thresh) {
+				continue;
+			}
+			// Use the same language as Tesseract used for the word.
+			Tesseract* lang_tess = word->tesseract;
+
+			// Setup a trial WERD_RES in which to classify with cube.
+			WERD_RES cube_word;
+			cube_word.InitForRetryRecognition(*word);
+			cube_word.SetupForRecognition(lang_tess->unicharset, this, BestPix(),
+				OEM_CUBE_ONLY,
+				NULL, false, false, false,
+				page_res_it.row()->row,
+				page_res_it.block()->block);
+			CubeObject *cube_obj = lang_tess->cube_recognize_word(
+				page_res_it.block()->block, &cube_word);
+			if (cube_obj != NULL)
+				lang_tess->cube_combine_word(cube_obj, &cube_word, word);
+			delete cube_obj;
+		}
+	}
+
+	/**
+	 * @name cube_word_pass1
+	 *
+	 * Recognizes a single word using (only) cube. Compatible with
+	 * Tesseract's classify_word_pass1/classify_word_pass2.
+	 */
+	void Tesseract::cube_word_pass1(BLOCK* block, ROW *row, WERD_RES *word) {
+		CubeObject *cube_obj = cube_recognize_word(block, word);
+		delete cube_obj;
+	}
+
+	/**
+	 * @name cube_recognize_word
+	 *
+	 * Cube recognizer to recognize a single word as with classify_word_pass1
+	 * but also returns the cube object in case the combiner is needed.
+	 */
+	CubeObject* Tesseract::cube_recognize_word(BLOCK* block, WERD_RES* word) {
+		if (!cube_binary_ || !cube_cntxt_) {
+			if (cube_debug_level > 0 && !cube_binary_)
+				tprintf("Tesseract::run_cube(): NULL binary image.\n");
+			word->SetupFake(unicharset);
+			return NULL;
+		}
+		TBOX word_box = word->word->bounding_box();
+		if (block != NULL && (block->re_rotation().x() != 1.0f ||
+			block->re_rotation().y() != 0.0f)) {
+			// TODO(rays) We have to rotate the bounding box to get the true coords.
+			// This will be achieved in the future via DENORM.
+			// In the mean time, cube can't process this word.
+			if (cube_debug_level > 0) {
+				tprintf("Cube can't process rotated word at:");
+				word_box.print();
+			}
+			word->SetupFake(unicharset);
+			return NULL;
+		}
+		CubeObject* cube_obj = new tesseract::CubeObject(
+			cube_cntxt_, cube_binary_, word_box.left(),
+			pixGetHeight(cube_binary_) - word_box.top(),
+			word_box.width(), word_box.height());
+		if (!cube_recognize(cube_obj, block, word)) {
+			delete cube_obj;
+			return NULL;
+		}
+		return cube_obj;
+	}
+
+	/**
+	 * @name cube_combine_word
+	 *
+	 * Combines the cube and tesseract results for a single word, leaving the
+	 * result in tess_word.
+	 */
+	void Tesseract::cube_combine_word(CubeObject* cube_obj, WERD_RES* cube_word,
+		WERD_RES* tess_word) {
+		float combiner_prob = tess_cube_combiner_->CombineResults(tess_word,
+			cube_obj);
+		// If combiner probability is greater than tess/cube combiner
+		// classifier threshold, i.e. tesseract wins, then just return the
+		// tesseract result unchanged, as the combiner knows nothing about how
+		// correct the answer is. If cube and tesseract agree, then improve the
+		// scores before returning.
+		WERD_CHOICE* tess_best = tess_word->best_choice;
+		WERD_CHOICE* cube_best = cube_word->best_choice;
+		if (cube_debug_level || classify_debug_level) {
+			tprintf("Combiner prob = %g vs threshold %g\n",
+				combiner_prob, cube_cntxt_->Params()->CombinerClassifierThresh());
+		}
+		if (combiner_prob >=
+			cube_cntxt_->Params()->CombinerClassifierThresh()) {
+			if (tess_best->unichar_string() == cube_best->unichar_string()) {
+				// Cube and tess agree, so improve the scores.
+				tess_best->set_rating(tess_best->rating() / 2);
+				tess_best->set_certainty(tess_best->certainty() / 2);
+			}
+			return;
+		}
+		// Cube wins.
+		// It is better for the language combiner to have all tesseract scores,
+		// so put them in the cube result.
+		cube_best->set_rating(tess_best->rating());
+		cube_best->set_certainty(tess_best->certainty());
+		if (cube_debug_level || classify_debug_level) {
+			tprintf("Cube INFO: tesseract result replaced by cube: %s -> %s\n",
+				tess_best->unichar_string().string(),
+				cube_best->unichar_string().string());
+		}
+		tess_word->ConsumeWordResults(cube_word);
+	}
+
+	/**
+	 * @name cube_recognize
+	 *
+	 * Call cube on the current word, and write the result to word.
+	 * Sets up a fake result and returns false if something goes wrong.
+	 */
+	bool Tesseract::cube_recognize(CubeObject *cube_obj, BLOCK* block,
+		WERD_RES *word) {
+		// Run cube
+		WordAltList *cube_alt_list = cube_obj->RecognizeWord();
+		if (!cube_alt_list || cube_alt_list->AltCount() <= 0) {
+			if (cube_debug_level > 0) {
+				tprintf("Cube returned nothing for word at:");
+				word->word->bounding_box().print();
+			}
+			word->SetupFake(unicharset);
+			return false;
+		}
+
+		// Get cube's best result and its probability, mapped to tesseract's
+		// certainty range
+		char_32 *cube_best_32 = cube_alt_list->Alt(0);
+		double cube_prob = CubeUtils::Cost2Prob(cube_alt_list->AltCost(0));
+		float cube_certainty = convert_prob_to_tess_certainty(cube_prob);
+		string cube_best_str;
+		CubeUtils::UTF32ToUTF8(cube_best_32, &cube_best_str);
+
+		// Retrieve Cube's character bounding boxes and CharSamples,
+		// corresponding to the most recent call to RecognizeWord().
+		Boxa *char_boxes = NULL;
+		CharSamp **char_samples = NULL;;
+		int num_chars;
+		if (!extract_cube_state(cube_obj, &num_chars, &char_boxes, &char_samples)
+			&& cube_debug_level > 0) {
+			tprintf("Cube WARNING (Tesseract::cube_recognize): Cannot extract "
+				"cube state.\n");
+			word->SetupFake(unicharset);
+			return false;
+		}
+
+		// Convert cube's character bounding boxes to a BoxWord.
+		BoxWord cube_box_word;
+		TBOX tess_word_box = word->word->bounding_box();
+		if (word->denorm.block() != NULL)
+			tess_word_box.rotate(word->denorm.block()->re_rotation());
+		bool box_word_success = create_cube_box_word(char_boxes, num_chars,
+			tess_word_box,
+			&cube_box_word);
+		boxaDestroy(&char_boxes);
+		if (!box_word_success) {
+			if (cube_debug_level > 0) {
+				tprintf("Cube WARNING (Tesseract::cube_recognize): Could not "
+					"create cube BoxWord\n");
+			}
+			word->SetupFake(unicharset);
+			return false;
+		}
+
+		// Fill tesseract result's fields with cube results
+		fill_werd_res(cube_box_word, cube_best_str.c_str(), word);
+
+		// Create cube's best choice.
+		BLOB_CHOICE** choices = new BLOB_CHOICE*[num_chars];
+		for (int i = 0; i < num_chars; ++i) {
+			UNICHAR_ID uch_id =
+				cube_cntxt_->CharacterSet()->UnicharID(char_samples[i]->StrLabel());
+			choices[i] = new BLOB_CHOICE(uch_id, -cube_certainty, cube_certainty,
+				-1, 0.0f, 0.0f, 0.0f, BCC_STATIC_CLASSIFIER);
+		}
+		word->FakeClassifyWord(num_chars, choices);
+		// within a word, cube recognizes the word in reading order.
+		word->best_choice->set_unichars_in_script_order(true);
+		delete[] choices;
+		delete[] char_samples;
+
+		// Some sanity checks
+		ASSERT_HOST(word->best_choice->length() == word->reject_map.length());
+
+		if (cube_debug_level || classify_debug_level) {
+			tprintf("Cube result: %s r=%g, c=%g\n",
+				word->best_choice->unichar_string().string(),
+				word->best_choice->rating(),
+				word->best_choice->certainty());
+		}
+		return true;
+	}
+
+	/**
+	 * @name fill_werd_res
+	 *
+	 * Fill Tesseract's word result fields with cube's.
+	 *
+	 */
+	void Tesseract::fill_werd_res(const BoxWord& cube_box_word,
+		const char* cube_best_str,
+		WERD_RES* tess_werd_res) {
+		delete tess_werd_res->box_word;
+		tess_werd_res->box_word = new BoxWord(cube_box_word);
+		tess_werd_res->box_word->ClipToOriginalWord(tess_werd_res->denorm.block(),
+			tess_werd_res->word);
+		// Fill text and remaining fields
+		tess_werd_res->word->set_text(cube_best_str);
+		tess_werd_res->tess_failed = FALSE;
+		tess_werd_res->tess_accepted = tess_acceptable_word(tess_werd_res);
+		// There is no output word, so we can' call AdaptableWord, but then I don't
+		// think we need to. Fudge the result with accepted.
+		tess_werd_res->tess_would_adapt = tess_werd_res->tess_accepted;
+
+		// Set word to done, i.e., ignore all of tesseract's tests for rejection
+		tess_werd_res->done = tess_werd_res->tess_accepted;
+	}
+
+}  // namespace tesseract
--- a/hgdriver/3rdparty/hgOCR/include/ccmain/cube_reco_context.cpp
+++ b/hgdriver/3rdparty/hgOCR/include/ccmain/cube_reco_context.cpp
@ -0,0 +1,184 @@
+/**********************************************************************
+ * File:        cube_reco_context.cpp
+ * Description: Implementation of the Cube Recognition Context Class
+ * Author:    Ahmad Abdulkader
+ * Created:   2007
+ *
+ * (C) Copyright 2008, Google Inc.
+ ** Licensed under the Apache License, Version 2.0 (the "License");
+ ** you may not use this file except in compliance with the License.
+ ** You may obtain a copy of the License at
+ ** http://www.apache.org/licenses/LICENSE-2.0
+ ** Unless required by applicable law or agreed to in writing, software
+ ** distributed under the License is distributed on an "AS IS" BASIS,
+ ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ ** See the License for the specific language governing permissions and
+ ** limitations under the License.
+ *
+ **********************************************************************/
+
+#include <string>
+#include <limits.h>
+
+#include "cube_reco_context.h"
+
+#include "classifier_factory.h"
+#include "cube_tuning_params.h"
+#include "dict.h"
+#include "feature_bmp.h"
+#include "tessdatamanager.h"
+#include "tesseractclass.h"
+#include "tess_lang_model.h"
+
+namespace tesseract {
+
+	/**
+	 * Instantiate a CubeRecoContext object using a Tesseract object.
+	 * CubeRecoContext will not take ownership of tess_obj, but will
+	 * record the pointer to it and will make use of various Tesseract
+	 * components (language model, flags, etc). Thus the caller should
+	 * keep tess_obj alive so long as the instantiated CubeRecoContext is used.
+	 */
+	CubeRecoContext::CubeRecoContext(Tesseract *tess_obj) {
+		tess_obj_ = tess_obj;
+		lang_ = "";
+		loaded_ = false;
+		lang_mod_ = NULL;
+		params_ = NULL;
+		char_classifier_ = NULL;
+		char_set_ = NULL;
+		word_size_model_ = NULL;
+		char_bigrams_ = NULL;
+		word_unigrams_ = NULL;
+		noisy_input_ = false;
+		size_normalization_ = false;
+	}
+
+	CubeRecoContext::~CubeRecoContext() {
+		delete char_classifier_;
+		char_classifier_ = NULL;
+
+		delete word_size_model_;
+		word_size_model_ = NULL;
+
+		delete char_set_;
+		char_set_ = NULL;
+
+		delete char_bigrams_;
+		char_bigrams_ = NULL;
+
+		delete word_unigrams_;
+		word_unigrams_ = NULL;
+
+		delete lang_mod_;
+		lang_mod_ = NULL;
+
+		delete params_;
+		params_ = NULL;
+	}
+
+	/**
+	 * Returns the path of the data files by looking up the TESSDATA_PREFIX
+	 * environment variable and appending a "tessdata" directory to it
+	 */
+	bool CubeRecoContext::GetDataFilePath(string *path) const {
+		*path = tess_obj_->datadir.string();
+		return true;
+	}
+
+	/**
+	 * The object initialization function that loads all the necessary
+	 * components of a RecoContext.  TessdataManager is used to load the
+	 * data from [lang].traineddata file.  If TESSDATA_CUBE_UNICHARSET
+	 * component is present, Cube will be instantiated with the unicharset
+	 * specified in this component and the corresponding dictionary
+	 * (TESSDATA_CUBE_SYSTEM_DAWG), and will map Cube's unicharset to
+	 * Tesseract's. Otherwise, TessdataManager will assume that Cube will
+	 * be using Tesseract's unicharset and dawgs, and will load the
+	 * unicharset from the TESSDATA_UNICHARSET component and will load the
+	 * dawgs from TESSDATA_*_DAWG components.
+	 */
+	bool CubeRecoContext::Load(TessdataManager *tessdata_manager,
+		UNICHARSET *tess_unicharset) {
+		ASSERT_HOST(tess_obj_ != NULL);
+		tess_unicharset_ = tess_unicharset;
+		string data_file_path;
+
+		// Get the data file path.
+		if (GetDataFilePath(&data_file_path) == false) {
+			fprintf(stderr, "Unable to get data file path\n");
+			return false;
+		}
+
+		// Get the language from the Tesseract object.
+		lang_ = tess_obj_->lang.string();
+
+		// Create the char set.
+		if ((char_set_ =
+			CharSet::Create(tessdata_manager, tess_unicharset)) == NULL) {
+			fprintf(stderr, "Cube ERROR (CubeRecoContext::Load): unable to load "
+				"CharSet\n");
+			return false;
+		}
+		// Create the language model.
+		string lm_file_name = data_file_path + lang_ + ".cube.lm";
+		string lm_params;
+		if (!CubeUtils::ReadFileToString(lm_file_name, &lm_params)) {
+			fprintf(stderr, "Cube ERROR (CubeRecoContext::Load): unable to read cube "
+				"language model params from %s\n", lm_file_name.c_str());
+			return false;
+		}
+		lang_mod_ = new TessLangModel(lm_params, data_file_path,
+			tess_obj_->getDict().load_system_dawg,
+			tessdata_manager, this);
+
+		// Create the optional char bigrams object.
+		char_bigrams_ = CharBigrams::Create(data_file_path, lang_);
+
+		// Create the optional word unigrams object.
+		word_unigrams_ = WordUnigrams::Create(data_file_path, lang_);
+
+		// Create the optional size model.
+		word_size_model_ = WordSizeModel::Create(data_file_path, lang_,
+			char_set_, Contextual());
+
+		// Load tuning params.
+		params_ = CubeTuningParams::Create(data_file_path, lang_);
+		if (params_ == NULL) {
+			fprintf(stderr, "Cube ERROR (CubeRecoContext::Load): unable to read "
+				"CubeTuningParams from %s\n", data_file_path.c_str());
+			return false;
+		}
+
+		// Create the char classifier.
+		char_classifier_ = CharClassifierFactory::Create(data_file_path, lang_,
+			lang_mod_, char_set_,
+			params_);
+		if (char_classifier_ == NULL) {
+			fprintf(stderr, "Cube ERROR (CubeRecoContext::Load): unable to load "
+				"CharClassifierFactory object from %s\n", data_file_path.c_str());
+			return false;
+		}
+
+		loaded_ = true;
+
+		return true;
+	}
+
+	/** Creates a CubeRecoContext object using a tesseract object */
+	CubeRecoContext * CubeRecoContext::Create(Tesseract *tess_obj,
+		TessdataManager *tessdata_manager,
+		UNICHARSET *tess_unicharset) {
+		// create the object
+		CubeRecoContext *cntxt = new CubeRecoContext(tess_obj);
+		// load the necessary components
+		if (cntxt->Load(tessdata_manager, tess_unicharset) == false) {
+			fprintf(stderr, "Cube ERROR (CubeRecoContext::Create): unable to init "
+				"CubeRecoContext object\n");
+			delete cntxt;
+			return NULL;
+		}
+		// success
+		return cntxt;
+	}
+}  // tesseract}
--- a/hgdriver/3rdparty/hgOCR/include/ccmain/cube_reco_context.h
+++ b/hgdriver/3rdparty/hgOCR/include/ccmain/cube_reco_context.h
@ -0,0 +1,157 @@
+/**********************************************************************
+ * File:        cube_reco_context.h
+ * Description: Declaration of the Cube Recognition Context Class
+ * Author:    Ahmad Abdulkader
+ * Created:   2007
+ *
+ * (C) Copyright 2008, Google Inc.
+ ** Licensed under the Apache License, Version 2.0 (the "License");
+ ** you may not use this file except in compliance with the License.
+ ** You may obtain a copy of the License at
+ ** http://www.apache.org/licenses/LICENSE-2.0
+ ** Unless required by applicable law or agreed to in writing, software
+ ** distributed under the License is distributed on an "AS IS" BASIS,
+ ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ ** See the License for the specific language governing permissions and
+ ** limitations under the License.
+ *
+ **********************************************************************/
+
+ // The CubeRecoContext class abstracts the Cube OCR Engine. Typically a process
+ // (or a thread) would create one CubeRecoContext object per language.
+ // The CubeRecoContext object also provides methods to get and set the
+ // different attribues of the Cube OCR Engine.
+
+#ifndef CUBE_RECO_CONTEXT_H
+#define CUBE_RECO_CONTEXT_H
+
+#include <string>
+#include "neural_net.h"
+#include "lang_model.h"
+#include "classifier_base.h"
+#include "feature_base.h"
+#include "char_set.h"
+#include "word_size_model.h"
+#include "char_bigrams.h"
+#include "word_unigrams.h"
+
+namespace tesseract {
+
+	class Tesseract;
+	class TessdataManager;
+
+	class CubeRecoContext {
+	public:
+		// Reading order enum type
+		enum ReadOrder {
+			L2R,
+			R2L
+		};
+
+		// Instantiate using a Tesseract object
+		CubeRecoContext(Tesseract *tess_obj);
+
+		~CubeRecoContext();
+
+		// accessor functions
+		inline const string & Lang() const { return lang_; }
+		inline CharSet *CharacterSet() const { return char_set_; }
+		const UNICHARSET *TessUnicharset() const { return tess_unicharset_; }
+		inline CharClassifier *Classifier() const { return char_classifier_; }
+		inline WordSizeModel *SizeModel() const { return word_size_model_; }
+		inline CharBigrams *Bigrams() const { return char_bigrams_; }
+		inline WordUnigrams *WordUnigramsObj() const { return word_unigrams_; }
+		inline TuningParams *Params() const { return params_; }
+		inline LangModel *LangMod() const { return lang_mod_; }
+
+		// the reading order of the language
+		inline ReadOrder ReadingOrder() const {
+			return ((lang_ == "ara") ? R2L : L2R);
+		}
+
+		// does the language support case
+		inline bool HasCase() const {
+			return (lang_ != "ara" && lang_ != "hin");
+		}
+
+		inline bool Cursive() const {
+			return (lang_ == "ara");
+		}
+
+		inline bool HasItalics() const {
+			return (lang_ != "ara" && lang_ != "hin");
+		}
+
+		inline bool Contextual() const {
+			return (lang_ == "ara");
+		}
+
+		// RecoContext runtime flags accessor functions
+		inline bool SizeNormalization() const { return size_normalization_; }
+		inline bool NoisyInput() const { return noisy_input_; }
+		inline bool OOD() const { return lang_mod_->OOD(); }
+		inline bool Numeric() const { return lang_mod_->Numeric(); }
+		inline bool WordList() const { return lang_mod_->WordList(); }
+		inline bool Punc() const { return lang_mod_->Punc(); }
+		inline bool CaseSensitive() const {
+			return char_classifier_->CaseSensitive();
+		}
+
+		inline void SetSizeNormalization(bool size_normalization) {
+			size_normalization_ = size_normalization;
+		}
+		inline void SetNoisyInput(bool noisy_input) {
+			noisy_input_ = noisy_input;
+		}
+		inline void SetOOD(bool ood_enabled) {
+			lang_mod_->SetOOD(ood_enabled);
+		}
+		inline void SetNumeric(bool numeric_enabled) {
+			lang_mod_->SetNumeric(numeric_enabled);
+		}
+		inline void SetWordList(bool word_list_enabled) {
+			lang_mod_->SetWordList(word_list_enabled);
+		}
+		inline void SetPunc(bool punc_enabled) {
+			lang_mod_->SetPunc(punc_enabled);
+		}
+		inline void SetCaseSensitive(bool case_sensitive) {
+			char_classifier_->SetCaseSensitive(case_sensitive);
+		}
+		inline tesseract::Tesseract *TesseractObject() const {
+			return tess_obj_;
+		}
+
+		// Returns the path of the data files
+		bool GetDataFilePath(string *path) const;
+		// Creates a CubeRecoContext object using a tesseract object. Data
+		// files are loaded via the tessdata_manager, and the tesseract
+		// unicharset is provided in order to map Cube's unicharset to
+		// Tesseract's in the case where the two unicharsets differ.
+		static CubeRecoContext *Create(Tesseract *tess_obj,
+			TessdataManager *tessdata_manager,
+			UNICHARSET *tess_unicharset);
+
+	private:
+		bool loaded_;
+		string lang_;
+		CharSet *char_set_;
+		UNICHARSET *tess_unicharset_;
+		WordSizeModel *word_size_model_;
+		CharClassifier *char_classifier_;
+		CharBigrams *char_bigrams_;
+		WordUnigrams *word_unigrams_;
+		TuningParams *params_;
+		LangModel *lang_mod_;
+		Tesseract *tess_obj_;  // CubeRecoContext does not own this pointer
+		bool size_normalization_;
+		bool noisy_input_;
+
+		// Loads and initialized all the necessary components of a
+		// CubeRecoContext. See .cpp for more details.
+		bool Load(TessdataManager *tessdata_manager,
+			UNICHARSET *tess_unicharset);
+	};
+}
+
+#endif  // CUBE_RECO_CONTEXT_H
--- a/hgdriver/3rdparty/hgOCR/include/ccmain/cubeclassifier.cpp
+++ b/hgdriver/3rdparty/hgOCR/include/ccmain/cubeclassifier.cpp
@ -0,0 +1,134 @@
+// Copyright 2011 Google Inc. All Rights Reserved.
+// Author: rays@google.com (Ray Smith)
+///////////////////////////////////////////////////////////////////////
+// File:        cubeclassifier.cpp
+// Description: Cube implementation of a ShapeClassifier.
+// Author:      Ray Smith
+// Created:     Wed Nov 23 10:39:45 PST 2011
+//
+// (C) Copyright 2011, Google Inc.
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+// http://www.apache.org/licenses/LICENSE-2.0
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+///////////////////////////////////////////////////////////////////////
+
+#include "cubeclassifier.h"
+
+#include "char_altlist.h"
+#include "char_set.h"
+#include "cube_object.h"
+#include "cube_reco_context.h"
+#include "tessclassifier.h"
+#include "tesseractclass.h"
+#include "trainingsample.h"
+#include "unicharset.h"
+
+namespace tesseract {
+
+	CubeClassifier::CubeClassifier(tesseract::Tesseract* tesseract)
+		: cube_cntxt_(tesseract->GetCubeRecoContext()),
+		shape_table_(*tesseract->shape_table()) {
+	}
+	CubeClassifier::~CubeClassifier() {
+	}
+
+	/// Classifies the given [training] sample, writing to results.
+	/// See ShapeClassifier for a full description.
+	int CubeClassifier::UnicharClassifySample(
+		const TrainingSample& sample, Pix* page_pix, int debug,
+		UNICHAR_ID keep_this, GenericVector<UnicharRating>* results) {
+		results->clear();
+		if (page_pix == NULL) return 0;
+
+		ASSERT_HOST(cube_cntxt_ != NULL);
+		const TBOX& char_box = sample.bounding_box();
+		CubeObject* cube_obj = new tesseract::CubeObject(
+			cube_cntxt_, page_pix, char_box.left(),
+			pixGetHeight(page_pix) - char_box.top(),
+			char_box.width(), char_box.height());
+		CharAltList* alt_list = cube_obj->RecognizeChar();
+		if (alt_list != NULL) {
+			alt_list->Sort();
+			CharSet* char_set = cube_cntxt_->CharacterSet();
+			for (int i = 0; i < alt_list->AltCount(); ++i) {
+				// Convert cube representation to a shape_id.
+				int alt_id = alt_list->Alt(i);
+				int unichar_id = char_set->UnicharID(char_set->ClassString(alt_id));
+				if (unichar_id >= 0)
+					results->push_back(UnicharRating(unichar_id, alt_list->AltProb(i)));
+			}
+			delete alt_list;
+		}
+		delete cube_obj;
+		return results->size();
+	}
+
+	/** Provides access to the ShapeTable that this classifier works with. */
+	const ShapeTable* CubeClassifier::GetShapeTable() const {
+		return &shape_table_;
+	}
+
+	CubeTessClassifier::CubeTessClassifier(tesseract::Tesseract* tesseract)
+		: cube_cntxt_(tesseract->GetCubeRecoContext()),
+		shape_table_(*tesseract->shape_table()),
+		pruner_(new TessClassifier(true, tesseract)) {
+	}
+	CubeTessClassifier::~CubeTessClassifier() {
+		delete pruner_;
+	}
+
+	/// Classifies the given [training] sample, writing to results.
+	/// See ShapeClassifier for a full description.
+	int CubeTessClassifier::UnicharClassifySample(
+		const TrainingSample& sample, Pix* page_pix, int debug,
+		UNICHAR_ID keep_this, GenericVector<UnicharRating>* results) {
+		int num_results = pruner_->UnicharClassifySample(sample, page_pix, debug,
+			keep_this, results);
+		if (page_pix == NULL) return num_results;
+
+		ASSERT_HOST(cube_cntxt_ != NULL);
+		const TBOX& char_box = sample.bounding_box();
+		CubeObject* cube_obj = new tesseract::CubeObject(
+			cube_cntxt_, page_pix, char_box.left(),
+			pixGetHeight(page_pix) - char_box.top(),
+			char_box.width(), char_box.height());
+		CharAltList* alt_list = cube_obj->RecognizeChar();
+		CharSet* char_set = cube_cntxt_->CharacterSet();
+		if (alt_list != NULL) {
+			for (int r = 0; r < num_results; ++r) {
+				// Get the best cube probability of the unichar in the result.
+				double best_prob = 0.0;
+				for (int i = 0; i < alt_list->AltCount(); ++i) {
+					int alt_id = alt_list->Alt(i);
+					int unichar_id = char_set->UnicharID(char_set->ClassString(alt_id));
+					if (unichar_id == (*results)[r].unichar_id &&
+						alt_list->AltProb(i) > best_prob) {
+						best_prob = alt_list->AltProb(i);
+					}
+				}
+				(*results)[r].rating = best_prob;
+			}
+			delete alt_list;
+			// Re-sort by rating.
+			results->sort(&UnicharRating::SortDescendingRating);
+		}
+		delete cube_obj;
+		return results->size();
+	}
+
+	/** Provides access to the ShapeTable that this classifier works with. */
+	const ShapeTable* CubeTessClassifier::GetShapeTable() const {
+		return &shape_table_;
+	}
+
+}  // namespace tesseract
+
+
+
--- a/hgdriver/3rdparty/hgOCR/include/ccmain/cubeclassifier.h
+++ b/hgdriver/3rdparty/hgOCR/include/ccmain/cubeclassifier.h
@ -0,0 +1,81 @@
+// Copyright 2011 Google Inc. All Rights Reserved.
+// Author: rays@google.com (Ray Smith)
+///////////////////////////////////////////////////////////////////////
+// File:        cubeclassifier.h
+// Description: Cube implementation of a ShapeClassifier.
+// Author:      Ray Smith
+// Created:     Wed Nov 23 10:36:32 PST 2011
+//
+// (C) Copyright 2011, Google Inc.
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+// http://www.apache.org/licenses/LICENSE-2.0
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+///////////////////////////////////////////////////////////////////////
+
+#ifndef THIRD_PARTY_TESSERACT_CCMAIN_CUBECLASSIFIER_H_
+#define THIRD_PARTY_TESSERACT_CCMAIN_CUBECLASSIFIER_H_
+
+#include "shapeclassifier.h"
+#include "platform.h"
+
+namespace tesseract {
+
+	class Classify;
+	class CubeRecoContext;
+	class ShapeTable;
+	class TessClassifier;
+	class Tesseract;
+	class TrainingSample;
+	struct UnicharRating;
+
+	// Cube implementation of a ShapeClassifier.
+	class TESS_API CubeClassifier : public ShapeClassifier {
+	public:
+		explicit CubeClassifier(Tesseract* tesseract);
+		virtual ~CubeClassifier();
+
+		// Classifies the given [training] sample, writing to results.
+		// See ShapeClassifier for a full description.
+		virtual int UnicharClassifySample(const TrainingSample& sample, Pix* page_pix,
+			int debug, UNICHAR_ID keep_this,
+			GenericVector<UnicharRating>* results);
+		// Provides access to the ShapeTable that this classifier works with.
+		virtual const ShapeTable* GetShapeTable() const;
+
+	private:
+		// Cube objects.
+		CubeRecoContext* cube_cntxt_;
+		const ShapeTable& shape_table_;
+	};
+
+	// Combination of Tesseract class pruner with scoring by cube.
+	class TESS_API CubeTessClassifier : public ShapeClassifier {
+	public:
+		explicit CubeTessClassifier(Tesseract* tesseract);
+		virtual ~CubeTessClassifier();
+
+		// Classifies the given [training] sample, writing to results.
+		// See ShapeClassifier for a full description.
+		virtual int UnicharClassifySample(const TrainingSample& sample, Pix* page_pix,
+			int debug, UNICHAR_ID keep_this,
+			GenericVector<UnicharRating>* results);
+		// Provides access to the ShapeTable that this classifier works with.
+		virtual const ShapeTable* GetShapeTable() const;
+
+	private:
+		// Cube objects.
+		CubeRecoContext* cube_cntxt_;
+		const ShapeTable& shape_table_;
+		TessClassifier* pruner_;
+	};
+
+}  // namespace tesseract
+
+#endif /* THIRD_PARTY_TESSERACT_CCMAIN_CUBECLASSIFIER_H_ */
--- a/hgdriver/3rdparty/hgOCR/include/ccmain/docqual.cpp
+++ b/hgdriver/3rdparty/hgOCR/include/ccmain/docqual.cpp
--- a/hgdriver/3rdparty/hgOCR/include/ccmain/docqual.h
+++ b/hgdriver/3rdparty/hgOCR/include/ccmain/docqual.h
@ -0,0 +1,35 @@
+/******************************************************************
+ * File:        docqual.h  (Formerly docqual.h)
+ * Description: Document Quality Metrics
+ * Author:		Phil Cheatle
+ * Created:		Mon May  9 11:27:28 BST 1994
+ *
+ * (C) Copyright 1994, Hewlett-Packard Ltd.
+ ** Licensed under the Apache License, Version 2.0 (the "License");
+ ** you may not use this file except in compliance with the License.
+ ** You may obtain a copy of the License at
+ ** http://www.apache.org/licenses/LICENSE-2.0
+ ** Unless required by applicable law or agreed to in writing, software
+ ** distributed under the License is distributed on an "AS IS" BASIS,
+ ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ ** See the License for the specific language governing permissions and
+ ** limitations under the License.
+ *
+ **********************************************************************/
+
+#ifndef           DOCQUAL_H
+#define           DOCQUAL_H
+
+#include          "control.h"
+
+enum GARBAGE_LEVEL
+{
+	G_NEVER_CRUNCH,
+	G_OK,
+	G_DODGY,
+	G_TERRIBLE
+};
+
+inT16 word_blob_quality(WERD_RES *word, ROW *row);
+void reject_whole_page(PAGE_RES_IT &page_res_it);
+#endif
--- a/hgdriver/3rdparty/hgOCR/include/ccmain/equationdetect.cpp
+++ b/hgdriver/3rdparty/hgOCR/include/ccmain/equationdetect.cpp
--- a/hgdriver/3rdparty/hgOCR/include/ccmain/equationdetect.h
+++ b/hgdriver/3rdparty/hgOCR/include/ccmain/equationdetect.h
@ -0,0 +1,278 @@
+///////////////////////////////////////////////////////////////////////
+// File:        equationdetect.h
+// Description: The equation detection class that inherits equationdetectbase.
+// Author:      Zongyi (Joe) Liu (joeliu@google.com)
+// Created:     Fri Aug 31 11:13:01 PST 2011
+//
+// (C) Copyright 2011, Google Inc.
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+// http://www.apache.org/licenses/LICENSE-2.0
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+///////////////////////////////////////////////////////////////////////
+
+#ifndef TESSERACT_CCMAIN_EQUATIONDETECT_H__
+#define TESSERACT_CCMAIN_EQUATIONDETECT_H__
+
+#include "blobbox.h"
+#include "equationdetectbase.h"
+#include "genericvector.h"
+#include "tesseractclass.h"
+#include "unichar.h"
+
+class BLOBNBOX;
+class BLOB_CHOICE;
+class BLOB_CHOICE_LIST;
+class TO_BLOCK_LIST;
+class TBOX;
+class UNICHARSET;
+
+namespace tesseract {
+
+	class Tesseract;
+	class ColPartition;
+	class ColPartitionGrid;
+	class ColPartitionSet;
+
+	class EquationDetect : public EquationDetectBase {
+	public:
+		EquationDetect(const char* equ_datapath,
+			const char* equ_language);
+		~EquationDetect();
+
+		enum IndentType {
+			NO_INDENT,
+			LEFT_INDENT,
+			RIGHT_INDENT,
+			BOTH_INDENT,
+			INDENT_TYPE_COUNT
+		};
+
+		// Reset the lang_tesseract_ pointer. This function should be called before we
+		// do any detector work.
+		void SetLangTesseract(Tesseract* lang_tesseract);
+
+		// Iterate over the blobs inside to_block, and set the blobs that we want to
+		// process to BSTT_NONE. (By default, they should be BSTT_SKIP). The function
+		// returns 0 upon success.
+		int LabelSpecialText(TO_BLOCK* to_block);
+
+		// Find possible equation partitions from part_grid. Should be called
+		// after the special_text_type of blobs are set.
+		// It returns 0 upon success.
+		int FindEquationParts(ColPartitionGrid* part_grid,
+			ColPartitionSet** best_columns);
+
+		// Reset the resolution of the processing image. TEST only function.
+		void SetResolution(const int resolution);
+
+	protected:
+		// Identify the special text type for one blob, and update its field. When
+		// height_th is set (> 0), we will label the blob as BSTT_NONE if its height
+		// is less than height_th.
+		void IdentifySpecialText(BLOBNBOX *blob, const int height_th);
+
+		// Estimate the type for one unichar.
+		BlobSpecialTextType EstimateTypeForUnichar(
+			const UNICHARSET& unicharset, const UNICHAR_ID id) const;
+
+		// Compute special text type for each blobs in part_grid_.
+		void IdentifySpecialText();
+
+		// Identify blobs that we want to skip during special blob type
+		// classification.
+		void IdentifyBlobsToSkip(ColPartition* part);
+
+		// The ColPartitions in part_grid_ maybe over-segmented, particularly in the
+		// block equation regions. So we like to identify these partitions and merge
+		// them before we do the searching.
+		void MergePartsByLocation();
+
+		// Staring from the seed center, we do radius search. And for partitions that
+		// have large overlaps with seed, we remove them from part_grid_ and add into
+		// parts_overlap. Note: this function may update the part_grid_, so if the
+		// caller is also running ColPartitionGridSearch, use the RepositionIterator
+		// to continue.
+		void SearchByOverlap(ColPartition* seed,
+			GenericVector<ColPartition*>* parts_overlap);
+
+		// Insert part back into part_grid_, after it absorbs some other parts.
+		void InsertPartAfterAbsorb(ColPartition* part);
+
+		// Identify the colparitions in part_grid_, label them as PT_EQUATION, and
+		// save them into cp_seeds_.
+		void IdentifySeedParts();
+
+		// Check the blobs count for a seed region candidate.
+		bool CheckSeedBlobsCount(ColPartition* part);
+
+		// Compute the foreground pixel density for a tbox area.
+		float ComputeForegroundDensity(const TBOX& tbox);
+
+		// Check if part from seed2 label: with low math density and left indented. We
+		// are using two checks:
+		// 1. If its left is aligned with any coordinates in indented_texts_left,
+		// which we assume have been sorted.
+		// 2. If its foreground density is over foreground_density_th.
+		bool CheckForSeed2(
+			const GenericVector<int>& indented_texts_left,
+			const float foreground_density_th,
+			ColPartition* part);
+
+		// Count the number of values in sorted_vec that is close to val, used to
+		// check if a partition is aligned with text partitions.
+		int CountAlignment(
+			const GenericVector<int>& sorted_vec, const int val) const;
+
+		// Check for a seed candidate using the foreground pixel density. And we
+		// return true if the density is below a certain threshold, because characters
+		// in equation regions usually are apart with more white spaces.
+		bool CheckSeedFgDensity(const float density_th, ColPartition* part);
+
+		// A light version of SplitCPHor: instead of really doing the part split, we
+		// simply compute the union bounding box of each splitted part.
+		void SplitCPHorLite(ColPartition* part, GenericVector<TBOX>* splitted_boxes);
+
+		// Split the part (horizontally), and save the splitted result into
+		// parts_splitted. Note that it is caller's responsibility to release the
+		// memory owns by parts_splitted. On the other hand, the part is unchanged
+		// during this process and still owns the blobs, so do NOT call DeleteBoxes
+		// when freeing the colpartitions in parts_splitted.
+		void SplitCPHor(ColPartition* part,
+			GenericVector<ColPartition*>* parts_splitted);
+
+		// Check the density for a seed candidate (part) using its math density and
+		// italic density, returns true if the check passed.
+		bool CheckSeedDensity(const float math_density_high,
+			const float math_density_low,
+			const ColPartition* part) const;
+
+		// Check if part is indented.
+		IndentType IsIndented(ColPartition* part);
+
+		// Identify inline partitions from cp_seeds_, and re-label them.
+		void IdentifyInlineParts();
+
+		// Comute the super bounding box for all colpartitions inside part_grid_.
+		void ComputeCPsSuperBBox();
+
+		// Identify inline partitions from cp_seeds_ using the horizontal search.
+		void IdentifyInlinePartsHorizontal();
+
+		// Estimate the line spacing between two text partitions. Returns -1 if not
+		// enough data.
+		int EstimateTextPartLineSpacing();
+
+		// Identify inline partitions from cp_seeds_ using vertical search.
+		void IdentifyInlinePartsVertical(const bool top_to_bottom,
+			const int textPartsLineSpacing);
+
+		// Check if part is an inline equation zone. This should be called after we
+		// identified the seed regions.
+		bool IsInline(const bool search_bottom,
+			const int textPartsLineSpacing,
+			ColPartition* part);
+
+		// For a given seed partition, we search the part_grid_ and see if there is
+		// any partition can be merged with it. It returns true if the seed has been
+		// expanded.
+		bool ExpandSeed(ColPartition* seed);
+
+		// Starting from the seed position, we search the part_grid_
+		// horizontally/vertically, find all parititions that can be
+		// merged with seed, remove them from part_grid_, and put them  into
+		// parts_to_merge.
+		void ExpandSeedHorizontal(const bool search_left,
+			ColPartition* seed,
+			GenericVector<ColPartition*>* parts_to_merge);
+		void ExpandSeedVertical(const bool search_bottom,
+			ColPartition* seed,
+			GenericVector<ColPartition*>* parts_to_merge);
+
+		// Check if a part_box is the small neighbor of seed_box.
+		bool IsNearSmallNeighbor(const TBOX& seed_box,
+			const TBOX& part_box) const;
+
+		// Perform the density check for part, which we assume is nearing a seed
+		// partition. It returns true if the check passed.
+		bool CheckSeedNeighborDensity(const ColPartition* part) const;
+
+		// After identify the math blocks, we do one more scanning on all text
+		// partitions, and check if any of them is the satellite of:
+		// math blocks: here a p is the satellite of q if:
+		// 1. q is the nearest vertical neighbor of p, and
+		// 2. y_gap(p, q) is less than a threshold, and
+		// 3. x_overlap(p, q) is over a threshold.
+		// Note that p can be the satellites of two blocks: its top neighbor and
+		// bottom neighbor.
+		void ProcessMathBlockSatelliteParts();
+
+		// Check if part is the satellite of one/two math blocks. If it is, we return
+		// true, and save the blocks into math_blocks.
+		bool IsMathBlockSatellite(
+			ColPartition* part, GenericVector<ColPartition*>* math_blocks);
+
+		// Search the nearest neighbor of part in one vertical direction as defined in
+		// search_bottom. It returns the neighbor found that major x overlap with it,
+		// or NULL when not found.
+		ColPartition* SearchNNVertical(const bool search_bottom,
+			const ColPartition* part);
+
+		// Check if the neighbor with vertical distance of y_gap is a near and math
+		// block partition.
+		bool IsNearMathNeighbor(const int y_gap, const ColPartition *neighbor) const;
+
+		// Generate the tiff file name for output/debug file.
+		void GetOutputTiffName(const char* name, STRING* image_name) const;
+
+		// Debugger function that renders ColPartitions on the input image, where:
+		// parts labeled as PT_EQUATION will be painted in red, PT_INLINE_EQUATION
+		// will be painted in green, and other parts will be painted in blue.
+		void PaintColParts(const STRING& outfile) const;
+
+		// Debugger function that renders the blobs in part_grid_ over the input
+		// image.
+		void PaintSpecialTexts(const STRING& outfile) const;
+
+		// Debugger function that print the math blobs density values for a
+		// ColPartition object.
+		void PrintSpecialBlobsDensity(const ColPartition* part) const;
+
+		// The tesseract engine intialized from equation training data.
+		Tesseract equ_tesseract_;
+
+		// The tesseract engine used for OCR. This pointer is passed in by the caller,
+		// so do NOT destroy it in this class.
+		Tesseract* lang_tesseract_;
+
+		// The ColPartitionGrid that we are processing. This pointer is passed in from
+		// the caller, so do NOT destroy it in the class.
+		ColPartitionGrid* part_grid_;
+
+		// A simple array of pointers to the best assigned column division at
+		// each grid y coordinate. This pointer is passed in from the caller, so do
+		// NOT destroy it in the class.
+		ColPartitionSet** best_columns_;
+
+		// The super bounding box of all cps in the part_grid_.
+		TBOX* cps_super_bbox_;
+
+		// The seed ColPartition for equation region.
+		GenericVector<ColPartition*> cp_seeds_;
+
+		// The resolution (dpi) of the processing image.
+		int resolution_;
+
+		// The number of pages we have processed.
+		int page_count_;
+	};
+
+}  // namespace tesseract
+
+#endif  // TESSERACT_CCMAIN_EQUATIONDETECT_H_
--- a/hgdriver/3rdparty/hgOCR/include/ccmain/fixspace.cpp
+++ b/hgdriver/3rdparty/hgOCR/include/ccmain/fixspace.cpp
@ -0,0 +1,876 @@
+/******************************************************************
+ * File:        fixspace.cpp  (Formerly fixspace.c)
+ * Description: Implements a pass over the page res, exploring the alternative
+ *              spacing possibilities, trying to use context to improve the
+ *              word spacing
+* Author:   Phil Cheatle
+* Created:    Thu Oct 21 11:38:43 BST 1993
+*
+* (C) Copyright 1993, Hewlett-Packard Ltd.
+** Licensed under the Apache License, Version 2.0 (the "License");
+** you may not use this file except in compliance with the License.
+** You may obtain a copy of the License at
+** http://www.apache.org/licenses/LICENSE-2.0
+** Unless required by applicable law or agreed to in writing, software
+** distributed under the License is distributed on an "AS IS" BASIS,
+** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+** See the License for the specific language governing permissions and
+** limitations under the License.
+*
+**********************************************************************/
+
+#include <ctype.h>
+#include "reject.h"
+#include "statistc.h"
+#include "control.h"
+#include "fixspace.h"
+#include "genblob.h"
+#include "tessvars.h"
+#include "tessbox.h"
+#include "globals.h"
+#include "tesseractclass.h"
+
+#define PERFECT_WERDS   999
+#define MAXSPACING      128      /*max expected spacing in pix */
+
+namespace tesseract {
+
+	/**
+	 * @name fix_fuzzy_spaces()
+	 * Walk over the page finding sequences of words joined by fuzzy spaces. Extract
+	 * them as a sublist, process the sublist to find the optimal arrangement of
+	 * spaces then replace the sublist in the ROW_RES.
+	 *
+	 * @param monitor progress monitor
+	 * @param word_count count of words in doc
+	 * @param[out] page_res
+	 */
+	void Tesseract::fix_fuzzy_spaces(ETEXT_DESC *monitor,
+		inT32 word_count,
+		PAGE_RES *page_res) {
+		BLOCK_RES_IT block_res_it;
+		ROW_RES_IT row_res_it;
+		WERD_RES_IT word_res_it_from;
+		WERD_RES_IT word_res_it_to;
+		WERD_RES *word_res;
+		WERD_RES_LIST fuzzy_space_words;
+		inT16 new_length;
+		BOOL8 prevent_null_wd_fixsp;   // DON'T process blobless wds
+		inT32 word_index;              // current word
+
+		block_res_it.set_to_list(&page_res->block_res_list);
+		word_index = 0;
+		for (block_res_it.mark_cycle_pt(); !block_res_it.cycled_list();
+			block_res_it.forward()) {
+			row_res_it.set_to_list(&block_res_it.data()->row_res_list);
+			for (row_res_it.mark_cycle_pt(); !row_res_it.cycled_list();
+				row_res_it.forward()) {
+				word_res_it_from.set_to_list(&row_res_it.data()->word_res_list);
+				while (!word_res_it_from.at_last()) {
+					word_res = word_res_it_from.data();
+					while (!word_res_it_from.at_last() &&
+						!(word_res->combination ||
+							word_res_it_from.data_relative(1)->word->flag(W_FUZZY_NON) ||
+							word_res_it_from.data_relative(1)->word->flag(W_FUZZY_SP))) {
+						fix_sp_fp_word(word_res_it_from, row_res_it.data()->row,
+							block_res_it.data()->block);
+						word_res = word_res_it_from.forward();
+						word_index++;
+						if (monitor != NULL) {
+							monitor->ocr_alive = TRUE;
+							monitor->progress = 90 + 5 * word_index / word_count;
+							if (monitor->deadline_exceeded() ||
+								(monitor->cancel != NULL &&
+								(*monitor->cancel)(monitor->cancel_this, stats_.dict_words)))
+								return;
+						}
+					}
+
+					if (!word_res_it_from.at_last()) {
+						word_res_it_to = word_res_it_from;
+						prevent_null_wd_fixsp =
+							word_res->word->cblob_list()->empty();
+						if (check_debug_pt(word_res, 60))
+							debug_fix_space_level.set_value(10);
+						word_res_it_to.forward();
+						word_index++;
+						if (monitor != NULL) {
+							monitor->ocr_alive = TRUE;
+							monitor->progress = 90 + 5 * word_index / word_count;
+							if (monitor->deadline_exceeded() ||
+								(monitor->cancel != NULL &&
+								(*monitor->cancel)(monitor->cancel_this, stats_.dict_words)))
+								return;
+						}
+						while (!word_res_it_to.at_last() &&
+							(word_res_it_to.data_relative(1)->word->flag(W_FUZZY_NON) ||
+								word_res_it_to.data_relative(1)->word->flag(W_FUZZY_SP))) {
+							if (check_debug_pt(word_res, 60))
+								debug_fix_space_level.set_value(10);
+							if (word_res->word->cblob_list()->empty())
+								prevent_null_wd_fixsp = TRUE;
+							word_res = word_res_it_to.forward();
+						}
+						if (check_debug_pt(word_res, 60))
+							debug_fix_space_level.set_value(10);
+						if (word_res->word->cblob_list()->empty())
+							prevent_null_wd_fixsp = TRUE;
+						if (prevent_null_wd_fixsp) {
+							word_res_it_from = word_res_it_to;
+						}
+						else {
+							fuzzy_space_words.assign_to_sublist(&word_res_it_from,
+								&word_res_it_to);
+							fix_fuzzy_space_list(fuzzy_space_words,
+								row_res_it.data()->row,
+								block_res_it.data()->block);
+							new_length = fuzzy_space_words.length();
+							word_res_it_from.add_list_before(&fuzzy_space_words);
+							for (;
+								!word_res_it_from.at_last() && new_length > 0;
+								new_length--) {
+								word_res_it_from.forward();
+							}
+						}
+						if (test_pt)
+							debug_fix_space_level.set_value(0);
+					}
+					fix_sp_fp_word(word_res_it_from, row_res_it.data()->row,
+						block_res_it.data()->block);
+					// Last word in row
+				}
+			}
+		}
+	}
+
+	void Tesseract::fix_fuzzy_space_list(WERD_RES_LIST &best_perm,
+		ROW *row,
+		BLOCK* block) {
+		inT16 best_score;
+		WERD_RES_LIST current_perm;
+		inT16 current_score;
+		BOOL8 improved = FALSE;
+
+		best_score = eval_word_spacing(best_perm);  // default score
+		dump_words(best_perm, best_score, 1, improved);
+
+		if (best_score != PERFECT_WERDS)
+			initialise_search(best_perm, current_perm);
+
+		while ((best_score != PERFECT_WERDS) && !current_perm.empty()) {
+			match_current_words(current_perm, row, block);
+			current_score = eval_word_spacing(current_perm);
+			dump_words(current_perm, current_score, 2, improved);
+			if (current_score > best_score) {
+				best_perm.clear();
+				best_perm.deep_copy(&current_perm, &WERD_RES::deep_copy);
+				best_score = current_score;
+				improved = TRUE;
+			}
+			if (current_score < PERFECT_WERDS)
+				transform_to_next_perm(current_perm);
+		}
+		dump_words(best_perm, best_score, 3, improved);
+	}
+
+}  // namespace tesseract
+
+void initialise_search(WERD_RES_LIST &src_list, WERD_RES_LIST &new_list) {
+	WERD_RES_IT src_it(&src_list);
+	WERD_RES_IT new_it(&new_list);
+	WERD_RES *src_wd;
+	WERD_RES *new_wd;
+
+	for (src_it.mark_cycle_pt(); !src_it.cycled_list(); src_it.forward()) {
+		src_wd = src_it.data();
+		if (!src_wd->combination) {
+			new_wd = WERD_RES::deep_copy(src_wd);
+			new_wd->combination = FALSE;
+			new_wd->part_of_combo = FALSE;
+			new_it.add_after_then_move(new_wd);
+		}
+	}
+}
+
+
+namespace tesseract {
+	void Tesseract::match_current_words(WERD_RES_LIST &words, ROW *row,
+		BLOCK* block) {
+		WERD_RES_IT word_it(&words);
+		WERD_RES *word;
+		// Since we are not using PAGE_RES to iterate over words, we need to update
+		// prev_word_best_choice_ before calling classify_word_pass2().
+		prev_word_best_choice_ = NULL;
+		for (word_it.mark_cycle_pt(); !word_it.cycled_list(); word_it.forward()) {
+			word = word_it.data();
+			if ((!word->part_of_combo) && (word->box_word == NULL)) {
+				WordData word_data(block, row, word);
+				SetupWordPassN(2, &word_data);
+				classify_word_and_language(2, NULL, &word_data);
+			}
+			prev_word_best_choice_ = word->best_choice;
+		}
+	}
+
+	/**
+	 * @name eval_word_spacing()
+	 * The basic measure is the number of characters in contextually confirmed
+	 * words. (I.e the word is done)
+	 * If all words are contextually confirmed the evaluation is deemed perfect.
+	 *
+	 * Some fiddles are done to handle "1"s as these are VERY frequent causes of
+	 * fuzzy spaces. The problem with the basic measure is that "561 63" would score
+	 * the same as "56163", though given our knowledge that the space is fuzzy, and
+	 * that there is a "1" next to the fuzzy space, we need to ensure that "56163"
+	 * is preferred.
+	 *
+	 * The solution is to NOT COUNT the score of any word which has a digit at one
+	 * end and a "1Il" as the character the other side of the space.
+	 *
+	 * Conversly, any character next to a "1" within a word is counted as a positive
+	 * score. Thus "561 63" would score 4 (3 chars in a numeric word plus 1 side of
+	 * the "1" joined).  "56163" would score 7 - all chars in a numeric word + 2
+	 * sides of a "1" joined.
+	 *
+	 * The joined 1 rule is applied to any word REGARDLESS of contextual
+	 * confirmation.  Thus "PS7a71 3/7a" scores 1 (neither word is contexutally
+	 * confirmed. The only score is from the joined 1. "PS7a713/7a" scores 2.
+	 *
+	 */
+	inT16 Tesseract::eval_word_spacing(WERD_RES_LIST &word_res_list) {
+		WERD_RES_IT word_res_it(&word_res_list);
+		inT16 total_score = 0;
+		inT16 word_count = 0;
+		inT16 done_word_count = 0;
+		inT16 word_len;
+		inT16 i;
+		inT16 offset;
+		WERD_RES *word;                 // current word
+		inT16 prev_word_score = 0;
+		BOOL8 prev_word_done = FALSE;
+		BOOL8 prev_char_1 = FALSE;      // prev ch a "1/I/l"?
+		BOOL8 prev_char_digit = FALSE;  // prev ch 2..9 or 0
+		BOOL8 current_char_1 = FALSE;
+		BOOL8 current_word_ok_so_far;
+		STRING punct_chars = "!\"`',.:;";
+		BOOL8 prev_char_punct = FALSE;
+		BOOL8 current_char_punct = FALSE;
+		BOOL8 word_done = FALSE;
+
+		do {
+			word = word_res_it.data();
+			word_done = fixspace_thinks_word_done(word);
+			word_count++;
+			if (word->tess_failed) {
+				total_score += prev_word_score;
+				if (prev_word_done)
+					done_word_count++;
+				prev_word_score = 0;
+				prev_char_1 = FALSE;
+				prev_char_digit = FALSE;
+				prev_word_done = FALSE;
+			}
+			else {
+				/*
+				  Can we add the prev word score and potentially count this word?
+				  Yes IF it didn't end in a 1 when the first char of this word is a digit
+					AND it didn't end in a digit when the first char of this word is a 1
+				*/
+				word_len = word->reject_map.length();
+				current_word_ok_so_far = FALSE;
+				if (!((prev_char_1 && digit_or_numeric_punct(word, 0)) ||
+					(prev_char_digit && (
+					(word_done &&
+						word->best_choice->unichar_lengths().string()[0] == 1 &&
+						word->best_choice->unichar_string()[0] == '1') ||
+						(!word_done && STRING(conflict_set_I_l_1).contains(
+							word->best_choice->unichar_string()[0])))))) {
+					total_score += prev_word_score;
+					if (prev_word_done)
+						done_word_count++;
+					current_word_ok_so_far = word_done;
+				}
+
+				if (current_word_ok_so_far) {
+					prev_word_done = TRUE;
+					prev_word_score = word_len;
+				}
+				else {
+					prev_word_done = FALSE;
+					prev_word_score = 0;
+				}
+
+				/* Add 1 to total score for every joined 1 regardless of context and
+				   rejtn */
+				for (i = 0, prev_char_1 = FALSE; i < word_len; i++) {
+					current_char_1 = word->best_choice->unichar_string()[i] == '1';
+					if (prev_char_1 || (current_char_1 && (i > 0)))
+						total_score++;
+					prev_char_1 = current_char_1;
+				}
+
+				/* Add 1 to total score for every joined punctuation regardless of context
+				  and rejtn */
+				if (tessedit_prefer_joined_punct) {
+					for (i = 0, offset = 0, prev_char_punct = FALSE; i < word_len;
+						offset += word->best_choice->unichar_lengths()[i++]) {
+						current_char_punct =
+							punct_chars.contains(word->best_choice->unichar_string()[offset]);
+						if (prev_char_punct || (current_char_punct && i > 0))
+							total_score++;
+						prev_char_punct = current_char_punct;
+					}
+				}
+				prev_char_digit = digit_or_numeric_punct(word, word_len - 1);
+				for (i = 0, offset = 0; i < word_len - 1;
+					offset += word->best_choice->unichar_lengths()[i++]);
+				prev_char_1 =
+					((word_done && (word->best_choice->unichar_string()[offset] == '1'))
+						|| (!word_done && STRING(conflict_set_I_l_1).contains(
+							word->best_choice->unichar_string()[offset])));
+			}
+			/* Find next word */
+			do {
+				word_res_it.forward();
+			} while (word_res_it.data()->part_of_combo);
+		} while (!word_res_it.at_first());
+		total_score += prev_word_score;
+		if (prev_word_done)
+			done_word_count++;
+		if (done_word_count == word_count)
+			return PERFECT_WERDS;
+		else
+			return total_score;
+	}
+
+	BOOL8 Tesseract::digit_or_numeric_punct(WERD_RES *word, int char_position) {
+		int i;
+		int offset;
+
+		for (i = 0, offset = 0; i < char_position;
+			offset += word->best_choice->unichar_lengths()[i++]);
+		return (
+			word->uch_set->get_isdigit(
+				word->best_choice->unichar_string().string() + offset,
+				word->best_choice->unichar_lengths()[i]) ||
+				(word->best_choice->permuter() == NUMBER_PERM &&
+					STRING(numeric_punctuation).contains(
+						word->best_choice->unichar_string().string()[offset])));
+	}
+
+}  // namespace tesseract
+
+
+/**
+ * @name transform_to_next_perm()
+ * Examines the current word list to find the smallest word gap size. Then walks
+ * the word list closing any gaps of this size by either inserted new
+ * combination words, or extending existing ones.
+ *
+ * The routine COULD be limited to stop it building words longer than N blobs.
+ *
+ * If there are no more gaps then it DELETES the entire list and returns the
+ * empty list to cause termination.
+ */
+void transform_to_next_perm(WERD_RES_LIST &words) {
+	WERD_RES_IT word_it(&words);
+	WERD_RES_IT prev_word_it(&words);
+	WERD_RES *word;
+	WERD_RES *prev_word;
+	WERD_RES *combo;
+	WERD *copy_word;
+	inT16 prev_right = -MAX_INT16;
+	TBOX box;
+	inT16 gap;
+	inT16 min_gap = MAX_INT16;
+
+	for (word_it.mark_cycle_pt(); !word_it.cycled_list(); word_it.forward()) {
+		word = word_it.data();
+		if (!word->part_of_combo) {
+			box = word->word->bounding_box();
+			if (prev_right > -MAX_INT16) {
+				gap = box.left() - prev_right;
+				if (gap < min_gap)
+					min_gap = gap;
+			}
+			prev_right = box.right();
+		}
+	}
+	if (min_gap < MAX_INT16) {
+		prev_right = -MAX_INT16;        // back to start
+		word_it.set_to_list(&words);
+		// Note: we can't use cycle_pt due to inserted combos at start of list.
+		for (; (prev_right == -MAX_INT16) || !word_it.at_first();
+			word_it.forward()) {
+			word = word_it.data();
+			if (!word->part_of_combo) {
+				box = word->word->bounding_box();
+				if (prev_right > -MAX_INT16) {
+					gap = box.left() - prev_right;
+					if (gap <= min_gap) {
+						prev_word = prev_word_it.data();
+						if (prev_word->combination) {
+							combo = prev_word;
+						}
+						else {
+							/* Make a new combination and insert before
+							 * the first word being joined. */
+							copy_word = new WERD;
+							*copy_word = *(prev_word->word);
+							// deep copy
+							combo = new WERD_RES(copy_word);
+							combo->combination = TRUE;
+							combo->x_height = prev_word->x_height;
+							prev_word->part_of_combo = TRUE;
+							prev_word_it.add_before_then_move(combo);
+						}
+						combo->word->set_flag(W_EOL, word->word->flag(W_EOL));
+						if (word->combination) {
+							combo->word->join_on(word->word);
+							// Move blobs to combo
+							// old combo no longer needed
+							delete word_it.extract();
+						}
+						else {
+							// Copy current wd to combo
+							combo->copy_on(word);
+							word->part_of_combo = TRUE;
+						}
+						combo->done = FALSE;
+						combo->ClearResults();
+					}
+					else {
+						prev_word_it = word_it;  // catch up
+					}
+				}
+				prev_right = box.right();
+			}
+		}
+	}
+	else {
+		words.clear();  // signal termination
+	}
+}
+
+namespace tesseract {
+	void Tesseract::dump_words(WERD_RES_LIST &perm, inT16 score,
+		inT16 mode, BOOL8 improved) {
+		WERD_RES_IT word_res_it(&perm);
+
+		if (debug_fix_space_level > 0) {
+			if (mode == 1) {
+				stats_.dump_words_str = "";
+				for (word_res_it.mark_cycle_pt(); !word_res_it.cycled_list();
+					word_res_it.forward()) {
+					if (!word_res_it.data()->part_of_combo) {
+						stats_.dump_words_str +=
+							word_res_it.data()->best_choice->unichar_string();
+						stats_.dump_words_str += ' ';
+					}
+				}
+			}
+
+			if (debug_fix_space_level > 1) {
+				switch (mode) {
+				case 1:
+					tprintf("EXTRACTED (%d): \"", score);
+					break;
+				case 2:
+					tprintf("TESTED (%d): \"", score);
+					break;
+				case 3:
+					tprintf("RETURNED (%d): \"", score);
+					break;
+				}
+
+				for (word_res_it.mark_cycle_pt(); !word_res_it.cycled_list();
+					word_res_it.forward()) {
+					if (!word_res_it.data()->part_of_combo) {
+						tprintf("%s/%1d ",
+							word_res_it.data()->best_choice->unichar_string().string(),
+							(int)word_res_it.data()->best_choice->permuter());
+					}
+				}
+				tprintf("\"\n");
+			}
+			else if (improved) {
+				tprintf("FIX SPACING \"%s\" => \"", stats_.dump_words_str.string());
+				for (word_res_it.mark_cycle_pt(); !word_res_it.cycled_list();
+					word_res_it.forward()) {
+					if (!word_res_it.data()->part_of_combo) {
+						tprintf("%s/%1d ",
+							word_res_it.data()->best_choice->unichar_string().string(),
+							(int)word_res_it.data()->best_choice->permuter());
+					}
+				}
+				tprintf("\"\n");
+			}
+		}
+	}
+
+	BOOL8 Tesseract::fixspace_thinks_word_done(WERD_RES *word) {
+		if (word->done)
+			return TRUE;
+
+		/*
+		  Use all the standard pass 2 conditions for mode 5 in set_done() in
+		  reject.c BUT DON'T REJECT IF THE WERD IS AMBIGUOUS - FOR SPACING WE DON'T
+		  CARE WHETHER WE HAVE of/at on/an etc.
+		*/
+		if (fixsp_done_mode > 0 &&
+			(word->tess_accepted ||
+			(fixsp_done_mode == 2 && word->reject_map.reject_count() == 0) ||
+				fixsp_done_mode == 3) &&
+				(strchr(word->best_choice->unichar_string().string(), ' ') == NULL) &&
+			((word->best_choice->permuter() == SYSTEM_DAWG_PERM) ||
+			(word->best_choice->permuter() == FREQ_DAWG_PERM) ||
+				(word->best_choice->permuter() == USER_DAWG_PERM) ||
+				(word->best_choice->permuter() == NUMBER_PERM))) {
+			return TRUE;
+		}
+		else {
+			return FALSE;
+		}
+	}
+
+
+	/**
+	 * @name fix_sp_fp_word()
+	 * Test the current word to see if it can be split by deleting noise blobs. If
+	 * so, do the business.
+	 * Return with the iterator pointing to the same place if the word is unchanged,
+	 * or the last of the replacement words.
+	 */
+	void Tesseract::fix_sp_fp_word(WERD_RES_IT &word_res_it, ROW *row,
+		BLOCK* block) {
+		WERD_RES *word_res;
+		WERD_RES_LIST sub_word_list;
+		WERD_RES_IT sub_word_list_it(&sub_word_list);
+		inT16 blob_index;
+		inT16 new_length;
+		float junk;
+
+		word_res = word_res_it.data();
+		if (word_res->word->flag(W_REP_CHAR) ||
+			word_res->combination ||
+			word_res->part_of_combo ||
+			!word_res->word->flag(W_DONT_CHOP))
+			return;
+
+		blob_index = worst_noise_blob(word_res, &junk);
+		if (blob_index < 0)
+			return;
+
+		if (debug_fix_space_level > 1) {
+			tprintf("FP fixspace working on \"%s\"\n",
+				word_res->best_choice->unichar_string().string());
+		}
+		word_res->word->rej_cblob_list()->sort(c_blob_comparator);
+		sub_word_list_it.add_after_stay_put(word_res_it.extract());
+		fix_noisy_space_list(sub_word_list, row, block);
+		new_length = sub_word_list.length();
+		word_res_it.add_list_before(&sub_word_list);
+		for (; !word_res_it.at_last() && new_length > 1; new_length--) {
+			word_res_it.forward();
+		}
+	}
+
+	void Tesseract::fix_noisy_space_list(WERD_RES_LIST &best_perm, ROW *row,
+		BLOCK* block) {
+		inT16 best_score;
+		WERD_RES_IT best_perm_it(&best_perm);
+		WERD_RES_LIST current_perm;
+		WERD_RES_IT current_perm_it(&current_perm);
+		WERD_RES *old_word_res;
+		inT16 current_score;
+		BOOL8 improved = FALSE;
+
+		best_score = fp_eval_word_spacing(best_perm);  // default score
+
+		dump_words(best_perm, best_score, 1, improved);
+
+		old_word_res = best_perm_it.data();
+		// Even deep_copy doesn't copy the underlying WERD unless its combination
+		// flag is true!.
+		old_word_res->combination = TRUE;   // Kludge to force deep copy
+		current_perm_it.add_to_end(WERD_RES::deep_copy(old_word_res));
+		old_word_res->combination = FALSE;  // Undo kludge
+
+		break_noisiest_blob_word(current_perm);
+
+		while (best_score != PERFECT_WERDS && !current_perm.empty()) {
+			match_current_words(current_perm, row, block);
+			current_score = fp_eval_word_spacing(current_perm);
+			dump_words(current_perm, current_score, 2, improved);
+			if (current_score > best_score) {
+				best_perm.clear();
+				best_perm.deep_copy(&current_perm, &WERD_RES::deep_copy);
+				best_score = current_score;
+				improved = TRUE;
+			}
+			if (current_score < PERFECT_WERDS) {
+				break_noisiest_blob_word(current_perm);
+			}
+		}
+		dump_words(best_perm, best_score, 3, improved);
+	}
+
+
+	/**
+	 * break_noisiest_blob_word()
+	 * Find the word with the blob which looks like the worst noise.
+	 * Break the word into two, deleting the noise blob.
+	 */
+	void Tesseract::break_noisiest_blob_word(WERD_RES_LIST &words) {
+		WERD_RES_IT word_it(&words);
+		WERD_RES_IT worst_word_it;
+		float worst_noise_score = 9999;
+		int worst_blob_index = -1;     // Noisiest blob of noisiest wd
+		int blob_index;                // of wds noisiest blob
+		float noise_score;             // of wds noisiest blob
+		WERD_RES *word_res;
+		C_BLOB_IT blob_it;
+		C_BLOB_IT rej_cblob_it;
+		C_BLOB_LIST new_blob_list;
+		C_BLOB_IT new_blob_it;
+		C_BLOB_IT new_rej_cblob_it;
+		WERD *new_word;
+		inT16 start_of_noise_blob;
+		inT16 i;
+
+		for (word_it.mark_cycle_pt(); !word_it.cycled_list(); word_it.forward()) {
+			blob_index = worst_noise_blob(word_it.data(), &noise_score);
+			if (blob_index > -1 && worst_noise_score > noise_score) {
+				worst_noise_score = noise_score;
+				worst_blob_index = blob_index;
+				worst_word_it = word_it;
+			}
+		}
+		if (worst_blob_index < 0) {
+			words.clear();          // signal termination
+			return;
+		}
+
+		/* Now split the worst_word_it */
+
+		word_res = worst_word_it.data();
+
+		/* Move blobs before noise blob to a new bloblist */
+
+		new_blob_it.set_to_list(&new_blob_list);
+		blob_it.set_to_list(word_res->word->cblob_list());
+		for (i = 0; i < worst_blob_index; i++, blob_it.forward()) {
+			new_blob_it.add_after_then_move(blob_it.extract());
+		}
+		start_of_noise_blob = blob_it.data()->bounding_box().left();
+		delete blob_it.extract();     // throw out noise blob
+
+		new_word = new WERD(&new_blob_list, word_res->word);
+		new_word->set_flag(W_EOL, FALSE);
+		word_res->word->set_flag(W_BOL, FALSE);
+		word_res->word->set_blanks(1);  // After break
+
+		new_rej_cblob_it.set_to_list(new_word->rej_cblob_list());
+		rej_cblob_it.set_to_list(word_res->word->rej_cblob_list());
+		for (;
+			(!rej_cblob_it.empty() &&
+			(rej_cblob_it.data()->bounding_box().left() < start_of_noise_blob));
+			rej_cblob_it.forward()) {
+			new_rej_cblob_it.add_after_then_move(rej_cblob_it.extract());
+		}
+
+		WERD_RES* new_word_res = new WERD_RES(new_word);
+		new_word_res->combination = TRUE;
+		worst_word_it.add_before_then_move(new_word_res);
+
+		word_res->ClearResults();
+	}
+
+	inT16 Tesseract::worst_noise_blob(WERD_RES *word_res,
+		float *worst_noise_score) {
+		float noise_score[512];
+		int i;
+		int min_noise_blob;            // 1st contender
+		int max_noise_blob;            // last contender
+		int non_noise_count;
+		int worst_noise_blob;          // Worst blob
+		float small_limit = kBlnXHeight * fixsp_small_outlines_size;
+		float non_noise_limit = kBlnXHeight * 0.8;
+
+		if (word_res->rebuild_word == NULL)
+			return -1;  // Can't handle cube words.
+
+		  // Normalised.
+		int blob_count = word_res->box_word->length();
+		ASSERT_HOST(blob_count <= 512);
+		if (blob_count < 5)
+			return -1;                   // too short to split
+
+		  /* Get the noise scores for all blobs */
+
+#ifndef SECURE_NAMES
+		if (debug_fix_space_level > 5)
+			tprintf("FP fixspace Noise metrics for \"%s\": ",
+				word_res->best_choice->unichar_string().string());
+#endif
+
+		for (i = 0; i < blob_count && i < word_res->rebuild_word->NumBlobs(); i++) {
+			TBLOB* blob = word_res->rebuild_word->blobs[i];
+			if (word_res->reject_map[i].accepted())
+				noise_score[i] = non_noise_limit;
+			else
+				noise_score[i] = blob_noise_score(blob);
+
+			if (debug_fix_space_level > 5)
+				tprintf("%1.1f ", noise_score[i]);
+		}
+		if (debug_fix_space_level > 5)
+			tprintf("\n");
+
+		/* Now find the worst one which is far enough away from the end of the word */
+
+		non_noise_count = 0;
+		for (i = 0; i < blob_count && non_noise_count < fixsp_non_noise_limit; i++) {
+			if (noise_score[i] >= non_noise_limit) {
+				non_noise_count++;
+			}
+		}
+		if (non_noise_count < fixsp_non_noise_limit)
+			return -1;
+
+		min_noise_blob = i;
+
+		non_noise_count = 0;
+		for (i = blob_count - 1; i >= 0 && non_noise_count < fixsp_non_noise_limit;
+			i--) {
+			if (noise_score[i] >= non_noise_limit) {
+				non_noise_count++;
+			}
+		}
+		if (non_noise_count < fixsp_non_noise_limit)
+			return -1;
+
+		max_noise_blob = i;
+
+		if (min_noise_blob > max_noise_blob)
+			return -1;
+
+		*worst_noise_score = small_limit;
+		worst_noise_blob = -1;
+		for (i = min_noise_blob; i <= max_noise_blob; i++) {
+			if (noise_score[i] < *worst_noise_score) {
+				worst_noise_blob = i;
+				*worst_noise_score = noise_score[i];
+			}
+		}
+		return worst_noise_blob;
+	}
+
+	float Tesseract::blob_noise_score(TBLOB *blob) {
+		TBOX box;                       // BB of outline
+		inT16 outline_count = 0;
+		inT16 max_dimension;
+		inT16 largest_outline_dimension = 0;
+
+		for (TESSLINE* ol = blob->outlines; ol != NULL; ol = ol->next) {
+			outline_count++;
+			box = ol->bounding_box();
+			if (box.height() > box.width()) {
+				max_dimension = box.height();
+			}
+			else {
+				max_dimension = box.width();
+			}
+
+			if (largest_outline_dimension < max_dimension)
+				largest_outline_dimension = max_dimension;
+		}
+
+		if (outline_count > 5) {
+			// penalise LOTS of blobs
+			largest_outline_dimension *= 2;
+		}
+
+		box = blob->bounding_box();
+		if (box.bottom() > kBlnBaselineOffset * 4 ||
+			box.top() < kBlnBaselineOffset / 2) {
+			// Lax blob is if high or low
+			largest_outline_dimension /= 2;
+		}
+
+		return largest_outline_dimension;
+	}
+}  // namespace tesseract
+
+void fixspace_dbg(WERD_RES *word) {
+	TBOX box = word->word->bounding_box();
+	BOOL8 show_map_detail = FALSE;
+	inT16 i;
+
+	box.print();
+	tprintf(" \"%s\" ", word->best_choice->unichar_string().string());
+	tprintf("Blob count: %d (word); %d/%d (rebuild word)\n",
+		word->word->cblob_list()->length(),
+		word->rebuild_word->NumBlobs(),
+		word->box_word->length());
+	word->reject_map.print(debug_fp);
+	tprintf("\n");
+	if (show_map_detail) {
+		tprintf("\"%s\"\n", word->best_choice->unichar_string().string());
+		for (i = 0; word->best_choice->unichar_string()[i] != '\0'; i++) {
+			tprintf("**** \"%c\" ****\n", word->best_choice->unichar_string()[i]);
+			word->reject_map[i].full_print(debug_fp);
+		}
+	}
+
+	tprintf("Tess Accepted: %s\n", word->tess_accepted ? "TRUE" : "FALSE");
+	tprintf("Done flag: %s\n\n", word->done ? "TRUE" : "FALSE");
+}
+
+
+/**
+ * fp_eval_word_spacing()
+ * Evaluation function for fixed pitch word lists.
+ *
+ * Basically, count the number of "nice" characters - those which are in tess
+ * acceptable words or in dict words and are not rejected.
+ * Penalise any potential noise chars
+ */
+namespace tesseract {
+	inT16 Tesseract::fp_eval_word_spacing(WERD_RES_LIST &word_res_list) {
+		WERD_RES_IT word_it(&word_res_list);
+		WERD_RES *word;
+		inT16 score = 0;
+		inT16 i;
+		float small_limit = kBlnXHeight * fixsp_small_outlines_size;
+
+		for (word_it.mark_cycle_pt(); !word_it.cycled_list(); word_it.forward()) {
+			word = word_it.data();
+			if (word->rebuild_word == NULL)
+				continue;  // Can't handle cube words.
+			if (word->done ||
+				word->tess_accepted ||
+				word->best_choice->permuter() == SYSTEM_DAWG_PERM ||
+				word->best_choice->permuter() == FREQ_DAWG_PERM ||
+				word->best_choice->permuter() == USER_DAWG_PERM ||
+				safe_dict_word(word) > 0) {
+				int num_blobs = word->rebuild_word->NumBlobs();
+				UNICHAR_ID space = word->uch_set->unichar_to_id(" ");
+				for (i = 0; i < word->best_choice->length() && i < num_blobs; ++i) {
+					TBLOB* blob = word->rebuild_word->blobs[i];
+					if (word->best_choice->unichar_id(i) == space ||
+						blob_noise_score(blob) < small_limit) {
+						score -= 1;  // penalise possibly erroneous non-space
+					}
+					else if (word->reject_map[i].accepted()) {
+						score++;
+					}
+				}
+			}
+		}
+		if (score < 0)
+			score = 0;
+		return score;
+	}
+
+}  // namespace tesseract
--- a/hgdriver/3rdparty/hgOCR/include/ccmain/fixspace.h
+++ b/hgdriver/3rdparty/hgOCR/include/ccmain/fixspace.h
@ -0,0 +1,31 @@
+/******************************************************************
+ * File:        fixspace.h  (Formerly fixspace.h)
+ * Description: Implements a pass over the page res, exploring the alternative
+ *					spacing possibilities, trying to use context to improve the
+		  word spacing
+* Author:		Phil Cheatle
+* Created:		Thu Oct 21 11:38:43 BST 1993
+*
+* (C) Copyright 1993, Hewlett-Packard Ltd.
+** Licensed under the Apache License, Version 2.0 (the "License");
+** you may not use this file except in compliance with the License.
+** You may obtain a copy of the License at
+** http://www.apache.org/licenses/LICENSE-2.0
+** Unless required by applicable law or agreed to in writing, software
+** distributed under the License is distributed on an "AS IS" BASIS,
+** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+** See the License for the specific language governing permissions and
+** limitations under the License.
+*
+**********************************************************************/
+
+#ifndef           FIXSPACE_H
+#define           FIXSPACE_H
+
+#include          "pageres.h"
+#include          "params.h"
+
+void initialise_search(WERD_RES_LIST &src_list, WERD_RES_LIST &new_list);
+void transform_to_next_perm(WERD_RES_LIST &words);
+void fixspace_dbg(WERD_RES *word);
+#endif
--- a/hgdriver/3rdparty/hgOCR/include/ccmain/fixxht.cpp
+++ b/hgdriver/3rdparty/hgOCR/include/ccmain/fixxht.cpp
@ -0,0 +1,216 @@
+/**********************************************************************
+ * File:        fixxht.cpp  (Formerly fixxht.c)
+ * Description: Improve x_ht and look out for case inconsistencies
+ * Author:		Phil Cheatle
+ * Created:		Thu Aug  5 14:11:08 BST 1993
+ *
+ * (C) Copyright 1992, Hewlett-Packard Ltd.
+ ** Licensed under the Apache License, Version 2.0 (the "License");
+ ** you may not use this file except in compliance with the License.
+ ** You may obtain a copy of the License at
+ ** http://www.apache.org/licenses/LICENSE-2.0
+ ** Unless required by applicable law or agreed to in writing, software
+ ** distributed under the License is distributed on an "AS IS" BASIS,
+ ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ ** See the License for the specific language governing permissions and
+ ** limitations under the License.
+ *
+ **********************************************************************/
+
+#include          <string.h>
+#include          <ctype.h>
+#include          "params.h"
+#include          "float2int.h"
+#include          "tesseractclass.h"
+
+namespace tesseract {
+
+	// Fixxht overview.
+	// Premise: Initial estimate of x-height is adequate most of the time, but
+	// occasionally it is incorrect. Most notable causes of failure are:
+	// 1. Small caps, where the top of the caps is the same as the body text
+	// xheight. For small caps words the xheight needs to be reduced to correctly
+	// recognize the caps in the small caps word.
+	// 2. All xheight lines, such as summer. Here the initial estimate will have
+	// guessed that the blob tops are caps and will have placed the xheight too low.
+	// 3. Noise/logos beside words, or changes in font size on a line. Such
+	// things can blow the statistics and cause an incorrect estimate.
+	// 4. Incorrect baseline. Can happen when 2 columns are incorrectly merged.
+	// In this case the x-height is often still correct.
+	//
+	// Algorithm.
+	// Compare the vertical position (top only) of alphnumerics in a word with
+	// the range of positions in training data (in the unicharset).
+	// See CountMisfitTops. If any characters disagree sufficiently with the
+	// initial xheight estimate, then recalculate the xheight, re-run OCR on
+	// the word, and if the number of vertical misfits goes down, along with
+	// either the word rating or certainty, then keep the new xheight.
+	// The new xheight is calculated as follows:ComputeCompatibleXHeight
+	// For each alphanumeric character that has a vertically misplaced top
+	// (a misfit), yet its bottom is within the acceptable range (ie it is not
+	// likely a sub-or super-script) calculate the range of acceptable xheight
+	// positions from its range of tops, and give each value in the range a
+	// number of votes equal to the distance of its top from its acceptance range.
+	// The x-height position with the median of the votes becomes the new
+	// x-height. This assumes that most characters will be correctly recognized
+	// even if the x-height is incorrect. This is not a terrible assumption, but
+	// it is not great. An improvement would be to use a classifier that does
+	// not care about vertical position or scaling at all.
+	// Separately collect stats on shifted baselines and apply the same logic to
+	// computing a best-fit shift to fix the error. If the baseline needs to be
+	// shifted, but the x-height is OK, returns the original x-height along with
+	// the baseline shift to indicate that recognition needs to re-run.
+
+	// If the max-min top of a unicharset char is bigger than kMaxCharTopRange
+	// then the char top cannot be used to judge misfits or suggest a new top.
+	const int kMaxCharTopRange = 48;
+
+	// Returns the number of misfit blob tops in this word.
+	int Tesseract::CountMisfitTops(WERD_RES *word_res) {
+		int bad_blobs = 0;
+		int num_blobs = word_res->rebuild_word->NumBlobs();
+		for (int blob_id = 0; blob_id < num_blobs; ++blob_id) {
+			TBLOB* blob = word_res->rebuild_word->blobs[blob_id];
+			UNICHAR_ID class_id = word_res->best_choice->unichar_id(blob_id);
+			if (unicharset.get_isalpha(class_id) || unicharset.get_isdigit(class_id)) {
+				int top = blob->bounding_box().top();
+				if (top >= INT_FEAT_RANGE)
+					top = INT_FEAT_RANGE - 1;
+				int min_bottom, max_bottom, min_top, max_top;
+				unicharset.get_top_bottom(class_id, &min_bottom, &max_bottom,
+					&min_top, &max_top);
+				if (max_top - min_top > kMaxCharTopRange)
+					continue;
+				bool bad = top < min_top - x_ht_acceptance_tolerance ||
+					top > max_top + x_ht_acceptance_tolerance;
+				if (bad)
+					++bad_blobs;
+				if (debug_x_ht_level >= 1) {
+					tprintf("Class %s is %s with top %d vs limits of %d->%d, +/-%d\n",
+						unicharset.id_to_unichar(class_id),
+						bad ? "Misfit" : "OK", top, min_top, max_top,
+						static_cast<int>(x_ht_acceptance_tolerance));
+				}
+			}
+		}
+		return bad_blobs;
+	}
+
+	// Returns a new x-height maximally compatible with the result in word_res.
+	// See comment above for overall algorithm.
+	float Tesseract::ComputeCompatibleXheight(WERD_RES *word_res,
+		float* baseline_shift) {
+		STATS top_stats(0, MAX_UINT8);
+		STATS shift_stats(-MAX_UINT8, MAX_UINT8);
+		int bottom_shift = 0;
+		int num_blobs = word_res->rebuild_word->NumBlobs();
+		do {
+			top_stats.clear();
+			shift_stats.clear();
+			for (int blob_id = 0; blob_id < num_blobs; ++blob_id) {
+				TBLOB* blob = word_res->rebuild_word->blobs[blob_id];
+				UNICHAR_ID class_id = word_res->best_choice->unichar_id(blob_id);
+				if (unicharset.get_isalpha(class_id) ||
+					unicharset.get_isdigit(class_id)) {
+					int top = blob->bounding_box().top() + bottom_shift;
+					// Clip the top to the limit of normalized feature space.
+					if (top >= INT_FEAT_RANGE)
+						top = INT_FEAT_RANGE - 1;
+					int bottom = blob->bounding_box().bottom() + bottom_shift;
+					int min_bottom, max_bottom, min_top, max_top;
+					unicharset.get_top_bottom(class_id, &min_bottom, &max_bottom,
+						&min_top, &max_top);
+					// Chars with a wild top range would mess up the result so ignore them.
+					if (max_top - min_top > kMaxCharTopRange)
+						continue;
+					int misfit_dist = MAX((min_top - x_ht_acceptance_tolerance) - top,
+						top - (max_top + x_ht_acceptance_tolerance));
+					int height = top - kBlnBaselineOffset;
+					if (debug_x_ht_level >= 2) {
+						tprintf("Class %s: height=%d, bottom=%d,%d top=%d,%d, actual=%d,%d: ",
+							unicharset.id_to_unichar(class_id),
+							height, min_bottom, max_bottom, min_top, max_top,
+							bottom, top);
+					}
+					// Use only chars that fit in the expected bottom range, and where
+					// the range of tops is sensibly near the xheight.
+					if (min_bottom <= bottom + x_ht_acceptance_tolerance &&
+						bottom - x_ht_acceptance_tolerance <= max_bottom &&
+						min_top > kBlnBaselineOffset &&
+						max_top - kBlnBaselineOffset >= kBlnXHeight &&
+						misfit_dist > 0) {
+						// Compute the x-height position using proportionality between the
+						// actual height and expected height.
+						int min_xht = DivRounded(height * kBlnXHeight,
+							max_top - kBlnBaselineOffset);
+						int max_xht = DivRounded(height * kBlnXHeight,
+							min_top - kBlnBaselineOffset);
+						if (debug_x_ht_level >= 2) {
+							tprintf(" xht range min=%d, max=%d\n", min_xht, max_xht);
+						}
+						// The range of expected heights gets a vote equal to the distance
+						// of the actual top from the expected top.
+						for (int y = min_xht; y <= max_xht; ++y)
+							top_stats.add(y, misfit_dist);
+					}
+					else if ((min_bottom > bottom + x_ht_acceptance_tolerance ||
+						bottom - x_ht_acceptance_tolerance > max_bottom) &&
+						bottom_shift == 0) {
+						// Get the range of required bottom shift.
+						int min_shift = min_bottom - bottom;
+						int max_shift = max_bottom - bottom;
+						if (debug_x_ht_level >= 2) {
+							tprintf(" bottom shift min=%d, max=%d\n", min_shift, max_shift);
+						}
+						// The range of expected shifts gets a vote equal to the min distance
+						// of the actual bottom from the expected bottom, spread over the
+						// range of its acceptance.
+						int misfit_weight = abs(min_shift);
+						if (max_shift > min_shift)
+							misfit_weight /= max_shift - min_shift;
+						for (int y = min_shift; y <= max_shift; ++y)
+							shift_stats.add(y, misfit_weight);
+					}
+					else {
+						if (bottom_shift == 0) {
+							// Things with bottoms that are already ok need to say so, on the
+							// 1st iteration only.
+							shift_stats.add(0, kBlnBaselineOffset);
+						}
+						if (debug_x_ht_level >= 2) {
+							tprintf(" already OK\n");
+						}
+					}
+				}
+			}
+			if (shift_stats.get_total() > top_stats.get_total()) {
+				bottom_shift = IntCastRounded(shift_stats.median());
+				if (debug_x_ht_level >= 2) {
+					tprintf("Applying bottom shift=%d\n", bottom_shift);
+				}
+			}
+		} while (bottom_shift != 0 &&
+			top_stats.get_total() < shift_stats.get_total());
+		// Baseline shift is opposite sign to the bottom shift.
+		*baseline_shift = -bottom_shift / word_res->denorm.y_scale();
+		if (debug_x_ht_level >= 2) {
+			tprintf("baseline shift=%g\n", *baseline_shift);
+		}
+		if (top_stats.get_total() == 0)
+			return bottom_shift != 0 ? word_res->x_height : 0.0f;
+		// The new xheight is just the median vote, which is then scaled out
+		// of BLN space back to pixel space to get the x-height in pixel space.
+		float new_xht = top_stats.median();
+		if (debug_x_ht_level >= 2) {
+			tprintf("Median xht=%f\n", new_xht);
+			tprintf("Mode20:A: New x-height = %f (norm), %f (orig)\n",
+				new_xht, new_xht / word_res->denorm.y_scale());
+		}
+		// The xheight must change by at least x_ht_min_change to be used.
+		if (fabs(new_xht - kBlnXHeight) >= x_ht_min_change)
+			return new_xht / word_res->denorm.y_scale();
+		else
+			return bottom_shift != 0 ? word_res->x_height : 0.0f;
+	}
+
+}  // namespace tesseract
--- a/hgdriver/3rdparty/hgOCR/include/ccmain/ltrresultiterator.cpp
+++ b/hgdriver/3rdparty/hgOCR/include/ccmain/ltrresultiterator.cpp
@ -0,0 +1,390 @@
+///////////////////////////////////////////////////////////////////////
+// File:        ltrresultiterator.cpp
+// Description: Iterator for tesseract results in strict left-to-right
+//              order that avoids using tesseract internal data structures.
+// Author:      Ray Smith
+// Created:     Fri Feb 26 14:32:09 PST 2010
+//
+// (C) Copyright 2010, Google Inc.
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+// http://www.apache.org/licenses/LICENSE-2.0
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+///////////////////////////////////////////////////////////////////////
+
+#include "ltrresultiterator.h"
+
+#include "allheaders.h"
+#include "pageres.h"
+#include "strngs.h"
+#include "tesseractclass.h"
+
+namespace tesseract {
+
+	LTRResultIterator::LTRResultIterator(PAGE_RES* page_res, Tesseract* tesseract,
+		int scale, int scaled_yres,
+		int rect_left, int rect_top,
+		int rect_width, int rect_height)
+		: PageIterator(page_res, tesseract, scale, scaled_yres,
+			rect_left, rect_top, rect_width, rect_height),
+		line_separator_("\n"),
+		paragraph_separator_("\n") {
+	}
+
+	LTRResultIterator::~LTRResultIterator() {
+	}
+
+	// Returns the null terminated UTF-8 encoded text string for the current
+	// object at the given level. Use delete [] to free after use.
+	char* LTRResultIterator::GetUTF8Text(PageIteratorLevel level) const {
+		if (it_->word() == NULL) return NULL;  // Already at the end!
+		STRING text;
+		PAGE_RES_IT res_it(*it_);
+		WERD_CHOICE* best_choice = res_it.word()->best_choice;
+		ASSERT_HOST(best_choice != NULL);
+		if (level == RIL_SYMBOL) {
+			text = res_it.word()->BestUTF8(blob_index_, false);
+		}
+		else if (level == RIL_WORD) {
+			text = best_choice->unichar_string();
+		}
+		else {
+			bool eol = false;  // end of line?
+			bool eop = false;  // end of paragraph?
+			do {  // for each paragraph in a block
+				do {  // for each text line in a paragraph
+					do {  // for each word in a text line
+						best_choice = res_it.word()->best_choice;
+						ASSERT_HOST(best_choice != NULL);
+						text += best_choice->unichar_string();
+						text += " ";
+						res_it.forward();
+						eol = res_it.row() != res_it.prev_row();
+					} while (!eol);
+					text.truncate_at(text.length() - 1);
+					text += line_separator_;
+					eop = res_it.block() != res_it.prev_block() ||
+						res_it.row()->row->para() != res_it.prev_row()->row->para();
+				} while (level != RIL_TEXTLINE && !eop);
+				if (eop) text += paragraph_separator_;
+			} while (level == RIL_BLOCK && res_it.block() == res_it.prev_block());
+		}
+		int length = text.length() + 1;
+		char* result = new char[length];
+		strncpy(result, text.string(), length);
+		return result;
+	}
+
+	// Set the string inserted at the end of each text line. "\n" by default.
+	void LTRResultIterator::SetLineSeparator(const char *new_line) {
+		line_separator_ = new_line;
+	}
+
+	// Set the string inserted at the end of each paragraph. "\n" by default.
+	void LTRResultIterator::SetParagraphSeparator(const char *new_para) {
+		paragraph_separator_ = new_para;
+	}
+
+	// Returns the mean confidence of the current object at the given level.
+	// The number should be interpreted as a percent probability. (0.0f-100.0f)
+	float LTRResultIterator::Confidence(PageIteratorLevel level) const {
+		if (it_->word() == NULL) return 0.0f;  // Already at the end!
+		float mean_certainty = 0.0f;
+		int certainty_count = 0;
+		PAGE_RES_IT res_it(*it_);
+		WERD_CHOICE* best_choice = res_it.word()->best_choice;
+		ASSERT_HOST(best_choice != NULL);
+		switch (level) {
+		case RIL_BLOCK:
+			do {
+				best_choice = res_it.word()->best_choice;
+				ASSERT_HOST(best_choice != NULL);
+				mean_certainty += best_choice->certainty();
+				++certainty_count;
+				res_it.forward();
+			} while (res_it.block() == res_it.prev_block());
+			break;
+		case RIL_PARA:
+			do {
+				best_choice = res_it.word()->best_choice;
+				ASSERT_HOST(best_choice != NULL);
+				mean_certainty += best_choice->certainty();
+				++certainty_count;
+				res_it.forward();
+			} while (res_it.block() == res_it.prev_block() &&
+				res_it.row()->row->para() == res_it.prev_row()->row->para());
+			break;
+		case RIL_TEXTLINE:
+			do {
+				best_choice = res_it.word()->best_choice;
+				ASSERT_HOST(best_choice != NULL);
+				mean_certainty += best_choice->certainty();
+				++certainty_count;
+				res_it.forward();
+			} while (res_it.row() == res_it.prev_row());
+			break;
+		case RIL_WORD:
+			mean_certainty += best_choice->certainty();
+			++certainty_count;
+			break;
+		case RIL_SYMBOL:
+			mean_certainty += best_choice->certainty(blob_index_);
+			++certainty_count;
+		}
+		if (certainty_count > 0) {
+			mean_certainty /= certainty_count;
+			float confidence = 100 + 5 * mean_certainty;
+			if (confidence < 0.0f) confidence = 0.0f;
+			if (confidence > 100.0f) confidence = 100.0f;
+			return confidence;
+		}
+		return 0.0f;
+	}
+
+	void LTRResultIterator::RowAttributes(float* row_height, float* descenders,
+		float* ascenders) const {
+		*row_height = it_->row()->row->x_height() + it_->row()->row->ascenders() -
+			it_->row()->row->descenders();
+		*descenders = it_->row()->row->descenders();
+		*ascenders = it_->row()->row->ascenders();
+	}
+
+	// Returns the font attributes of the current word. If iterating at a higher
+	// level object than words, eg textlines, then this will return the
+	// attributes of the first word in that textline.
+	// The actual return value is a string representing a font name. It points
+	// to an internal table and SHOULD NOT BE DELETED. Lifespan is the same as
+	// the iterator itself, ie rendered invalid by various members of
+	// TessBaseAPI, including Init, SetImage, End or deleting the TessBaseAPI.
+	// Pointsize is returned in printers points (1/72 inch.)
+	const char* LTRResultIterator::WordFontAttributes(bool* is_bold,
+		bool* is_italic,
+		bool* is_underlined,
+		bool* is_monospace,
+		bool* is_serif,
+		bool* is_smallcaps,
+		int* pointsize,
+		int* font_id) const {
+		if (it_->word() == NULL) return NULL;  // Already at the end!
+		if (it_->word()->fontinfo == NULL) {
+			*font_id = -1;
+			return NULL;  // No font information.
+		}
+		const FontInfo& font_info = *it_->word()->fontinfo;
+		*font_id = font_info.universal_id;
+		*is_bold = font_info.is_bold();
+		*is_italic = font_info.is_italic();
+		*is_underlined = false;  // TODO(rays) fix this!
+		*is_monospace = font_info.is_fixed_pitch();
+		*is_serif = font_info.is_serif();
+		*is_smallcaps = it_->word()->small_caps;
+		float row_height = it_->row()->row->x_height() +
+			it_->row()->row->ascenders() - it_->row()->row->descenders();
+		// Convert from pixels to printers points.
+		*pointsize = scaled_yres_ > 0
+			? static_cast<int>(row_height * kPointsPerInch / scaled_yres_ + 0.5)
+			: 0;
+
+		return font_info.name;
+	}
+
+	// Returns the name of the language used to recognize this word.
+	const char* LTRResultIterator::WordRecognitionLanguage() const {
+		if (it_->word() == NULL || it_->word()->tesseract == NULL) return NULL;
+		return it_->word()->tesseract->lang.string();
+	}
+
+	// Return the overall directionality of this word.
+	StrongScriptDirection LTRResultIterator::WordDirection() const {
+		if (it_->word() == NULL) return DIR_NEUTRAL;
+		bool has_rtl = it_->word()->AnyRtlCharsInWord();
+		bool has_ltr = it_->word()->AnyLtrCharsInWord();
+		if (has_rtl && !has_ltr)
+			return DIR_RIGHT_TO_LEFT;
+		if (has_ltr && !has_rtl)
+			return DIR_LEFT_TO_RIGHT;
+		if (!has_ltr && !has_rtl)
+			return DIR_NEUTRAL;
+		return DIR_MIX;
+	}
+
+	// Returns true if the current word was found in a dictionary.
+	bool LTRResultIterator::WordIsFromDictionary() const {
+		if (it_->word() == NULL) return false;  // Already at the end!
+		int permuter = it_->word()->best_choice->permuter();
+		return permuter == SYSTEM_DAWG_PERM || permuter == FREQ_DAWG_PERM ||
+			permuter == USER_DAWG_PERM;
+	}
+
+	// Returns true if the current word is numeric.
+	bool LTRResultIterator::WordIsNumeric() const {
+		if (it_->word() == NULL) return false;  // Already at the end!
+		int permuter = it_->word()->best_choice->permuter();
+		return permuter == NUMBER_PERM;
+	}
+
+	// Returns true if the word contains blamer information.
+	bool LTRResultIterator::HasBlamerInfo() const {
+		return it_->word() != NULL && it_->word()->blamer_bundle != NULL &&
+			it_->word()->blamer_bundle->HasDebugInfo();
+	}
+
+	// Returns the pointer to ParamsTrainingBundle stored in the BlamerBundle
+	// of the current word.
+	const void *LTRResultIterator::GetParamsTrainingBundle() const {
+		return (it_->word() != NULL && it_->word()->blamer_bundle != NULL) ?
+			&(it_->word()->blamer_bundle->params_training_bundle()) : NULL;
+	}
+
+	// Returns the pointer to the string with blamer information for this word.
+	// Assumes that the word's blamer_bundle is not NULL.
+	const char *LTRResultIterator::GetBlamerDebug() const {
+		return it_->word()->blamer_bundle->debug().string();
+	}
+
+	// Returns the pointer to the string with misadaption information for this word.
+	// Assumes that the word's blamer_bundle is not NULL.
+	const char *LTRResultIterator::GetBlamerMisadaptionDebug() const {
+		return it_->word()->blamer_bundle->misadaption_debug().string();
+	}
+
+	// Returns true if a truth string was recorded for the current word.
+	bool LTRResultIterator::HasTruthString() const {
+		if (it_->word() == NULL) return false;  // Already at the end!
+		if (it_->word()->blamer_bundle == NULL ||
+			it_->word()->blamer_bundle->NoTruth()) {
+			return false;  // no truth information for this word
+		}
+		return true;
+	}
+
+	// Returns true if the given string is equivalent to the truth string for
+	// the current word.
+	bool LTRResultIterator::EquivalentToTruth(const char *str) const {
+		if (!HasTruthString()) return false;
+		ASSERT_HOST(it_->word()->uch_set != NULL);
+		WERD_CHOICE str_wd(str, *(it_->word()->uch_set));
+		return it_->word()->blamer_bundle->ChoiceIsCorrect(&str_wd);
+	}
+
+	// Returns the null terminated UTF-8 encoded truth string for the current word.
+	// Use delete [] to free after use.
+	char* LTRResultIterator::WordTruthUTF8Text() const {
+		if (!HasTruthString()) return NULL;
+		STRING truth_text = it_->word()->blamer_bundle->TruthString();
+		int length = truth_text.length() + 1;
+		char* result = new char[length];
+		strncpy(result, truth_text.string(), length);
+		return result;
+	}
+
+	// Returns the null terminated UTF-8 encoded normalized OCR string for the
+	// current word. Use delete [] to free after use.
+	char* LTRResultIterator::WordNormedUTF8Text() const {
+		if (it_->word() == NULL) return NULL;  // Already at the end!
+		STRING ocr_text;
+		WERD_CHOICE* best_choice = it_->word()->best_choice;
+		const UNICHARSET *unicharset = it_->word()->uch_set;
+		ASSERT_HOST(best_choice != NULL);
+		for (int i = 0; i < best_choice->length(); ++i) {
+			ocr_text += unicharset->get_normed_unichar(best_choice->unichar_id(i));
+		}
+		int length = ocr_text.length() + 1;
+		char* result = new char[length];
+		strncpy(result, ocr_text.string(), length);
+		return result;
+	}
+
+	// Returns a pointer to serialized choice lattice.
+	// Fills lattice_size with the number of bytes in lattice data.
+	const char *LTRResultIterator::WordLattice(int *lattice_size) const {
+		if (it_->word() == NULL) return NULL;  // Already at the end!
+		if (it_->word()->blamer_bundle == NULL) return NULL;
+		*lattice_size = it_->word()->blamer_bundle->lattice_size();
+		return it_->word()->blamer_bundle->lattice_data();
+	}
+
+	// Returns true if the current symbol is a superscript.
+	// If iterating at a higher level object than symbols, eg words, then
+	// this will return the attributes of the first symbol in that word.
+	bool LTRResultIterator::SymbolIsSuperscript() const {
+		if (cblob_it_ == NULL && it_->word() != NULL)
+			return it_->word()->best_choice->BlobPosition(blob_index_) ==
+			SP_SUPERSCRIPT;
+		return false;
+	}
+
+	// Returns true if the current symbol is a subscript.
+	// If iterating at a higher level object than symbols, eg words, then
+	// this will return the attributes of the first symbol in that word.
+	bool LTRResultIterator::SymbolIsSubscript() const {
+		if (cblob_it_ == NULL && it_->word() != NULL)
+			return it_->word()->best_choice->BlobPosition(blob_index_) == SP_SUBSCRIPT;
+		return false;
+	}
+
+	// Returns true if the current symbol is a dropcap.
+	// If iterating at a higher level object than symbols, eg words, then
+	// this will return the attributes of the first symbol in that word.
+	bool LTRResultIterator::SymbolIsDropcap() const {
+		if (cblob_it_ == NULL && it_->word() != NULL)
+			return it_->word()->best_choice->BlobPosition(blob_index_) == SP_DROPCAP;
+		return false;
+	}
+
+	ChoiceIterator::ChoiceIterator(const LTRResultIterator& result_it) {
+		ASSERT_HOST(result_it.it_->word() != NULL);
+		word_res_ = result_it.it_->word();
+		BLOB_CHOICE_LIST* choices = NULL;
+		if (word_res_->ratings != NULL)
+			choices = word_res_->GetBlobChoices(result_it.blob_index_);
+		if (choices != NULL && !choices->empty()) {
+			choice_it_ = new BLOB_CHOICE_IT(choices);
+			choice_it_->mark_cycle_pt();
+		}
+		else {
+			choice_it_ = NULL;
+		}
+	}
+
+	ChoiceIterator::~ChoiceIterator() {
+		delete choice_it_;
+	}
+
+	// Moves to the next choice for the symbol and returns false if there
+	// are none left.
+	bool ChoiceIterator::Next() {
+		if (choice_it_ == NULL)
+			return false;
+		choice_it_->forward();
+		return !choice_it_->cycled_list();
+	}
+
+	// Returns the null terminated UTF-8 encoded text string for the current
+	// choice. Do NOT use delete [] to free after use.
+	const char* ChoiceIterator::GetUTF8Text() const {
+		if (choice_it_ == NULL)
+			return NULL;
+		UNICHAR_ID id = choice_it_->data()->unichar_id();
+		return word_res_->uch_set->id_to_unichar_ext(id);
+	}
+
+	// Returns the confidence of the current choice.
+	// The number should be interpreted as a percent probability. (0.0f-100.0f)
+	float ChoiceIterator::Confidence() const {
+		if (choice_it_ == NULL)
+			return 0.0f;
+		float confidence = 100 + 5 * choice_it_->data()->certainty();
+		if (confidence < 0.0f) confidence = 0.0f;
+		if (confidence > 100.0f) confidence = 100.0f;
+		return confidence;
+	}
+
+
+}  // namespace tesseract.
--- a/hgdriver/3rdparty/hgOCR/include/ccmain/ltrresultiterator.h
+++ b/hgdriver/3rdparty/hgOCR/include/ccmain/ltrresultiterator.h
@ -0,0 +1,218 @@
+///////////////////////////////////////////////////////////////////////
+// File:        ltrresultiterator.h
+// Description: Iterator for tesseract results in strict left-to-right
+//              order that avoids using tesseract internal data structures.
+// Author:      Ray Smith
+// Created:     Fri Feb 26 11:01:06 PST 2010
+//
+// (C) Copyright 2010, Google Inc.
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+// http://www.apache.org/licenses/LICENSE-2.0
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+///////////////////////////////////////////////////////////////////////
+
+#ifndef TESSERACT_CCMAIN_LTR_RESULT_ITERATOR_H__
+#define TESSERACT_CCMAIN_LTR_RESULT_ITERATOR_H__
+
+#include "platform.h"
+#include "pageiterator.h"
+#include "unichar.h"
+
+class BLOB_CHOICE_IT;
+class WERD_RES;
+
+namespace tesseract {
+
+	class Tesseract;
+
+	// Class to iterate over tesseract results, providing access to all levels
+	// of the page hierarchy, without including any tesseract headers or having
+	// to handle any tesseract structures.
+	// WARNING! This class points to data held within the TessBaseAPI class, and
+	// therefore can only be used while the TessBaseAPI class still exists and
+	// has not been subjected to a call of Init, SetImage, Recognize, Clear, End
+	// DetectOS, or anything else that changes the internal PAGE_RES.
+	// See apitypes.h for the definition of PageIteratorLevel.
+	// See also base class PageIterator, which contains the bulk of the interface.
+	// LTRResultIterator adds text-specific methods for access to OCR output.
+
+	class TESS_API LTRResultIterator : public PageIterator {
+		friend class ChoiceIterator;
+	public:
+		// page_res and tesseract come directly from the BaseAPI.
+		// The rectangle parameters are copied indirectly from the Thresholder,
+		// via the BaseAPI. They represent the coordinates of some rectangle in an
+		// original image (in top-left-origin coordinates) and therefore the top-left
+		// needs to be added to any output boxes in order to specify coordinates
+		// in the original image. See TessBaseAPI::SetRectangle.
+		// The scale and scaled_yres are in case the Thresholder scaled the image
+		// rectangle prior to thresholding. Any coordinates in tesseract's image
+		// must be divided by scale before adding (rect_left, rect_top).
+		// The scaled_yres indicates the effective resolution of the binary image
+		// that tesseract has been given by the Thresholder.
+		// After the constructor, Begin has already been called.
+		LTRResultIterator(PAGE_RES* page_res, Tesseract* tesseract,
+			int scale, int scaled_yres,
+			int rect_left, int rect_top,
+			int rect_width, int rect_height);
+		virtual ~LTRResultIterator();
+
+		// LTRResultIterators may be copied! This makes it possible to iterate over
+		// all the objects at a lower level, while maintaining an iterator to
+		// objects at a higher level. These constructors DO NOT CALL Begin, so
+		// iterations will continue from the location of src.
+		// TODO: For now the copy constructor and operator= only need the base class
+		// versions, but if new data members are added, don't forget to add them!
+
+		// ============= Moving around within the page ============.
+
+		// See PageIterator.
+
+		// ============= Accessing data ==============.
+
+		// Returns the null terminated UTF-8 encoded text string for the current
+		// object at the given level. Use delete [] to free after use.
+		char* GetUTF8Text(PageIteratorLevel level) const;
+
+		// Set the string inserted at the end of each text line. "\n" by default.
+		void SetLineSeparator(const char *new_line);
+
+		// Set the string inserted at the end of each paragraph. "\n" by default.
+		void SetParagraphSeparator(const char *new_para);
+
+		// Returns the mean confidence of the current object at the given level.
+		// The number should be interpreted as a percent probability. (0.0f-100.0f)
+		float Confidence(PageIteratorLevel level) const;
+
+		// Returns the attributes of the current row.
+		void RowAttributes(float* row_height, float* descenders,
+			float* ascenders) const;
+
+		// ============= Functions that refer to words only ============.
+
+		// Returns the font attributes of the current word. If iterating at a higher
+		// level object than words, eg textlines, then this will return the
+		// attributes of the first word in that textline.
+		// The actual return value is a string representing a font name. It points
+		// to an internal table and SHOULD NOT BE DELETED. Lifespan is the same as
+		// the iterator itself, ie rendered invalid by various members of
+		// TessBaseAPI, including Init, SetImage, End or deleting the TessBaseAPI.
+		// Pointsize is returned in printers points (1/72 inch.)
+		const char* WordFontAttributes(bool* is_bold,
+			bool* is_italic,
+			bool* is_underlined,
+			bool* is_monospace,
+			bool* is_serif,
+			bool* is_smallcaps,
+			int* pointsize,
+			int* font_id) const;
+
+		// Return the name of the language used to recognize this word.
+		// On error, NULL.  Do not delete this pointer.
+		const char* WordRecognitionLanguage() const;
+
+		// Return the overall directionality of this word.
+		StrongScriptDirection WordDirection() const;
+
+		// Returns true if the current word was found in a dictionary.
+		bool WordIsFromDictionary() const;
+
+		// Returns true if the current word is numeric.
+		bool WordIsNumeric() const;
+
+		// Returns true if the word contains blamer information.
+		bool HasBlamerInfo() const;
+
+		// Returns the pointer to ParamsTrainingBundle stored in the BlamerBundle
+		// of the current word.
+		const void *GetParamsTrainingBundle() const;
+
+		// Returns a pointer to the string with blamer information for this word.
+		// Assumes that the word's blamer_bundle is not NULL.
+		const char *GetBlamerDebug() const;
+
+		// Returns a pointer to the string with misadaption information for this word.
+		// Assumes that the word's blamer_bundle is not NULL.
+		const char *GetBlamerMisadaptionDebug() const;
+
+		// Returns true if a truth string was recorded for the current word.
+		bool HasTruthString() const;
+
+		// Returns true if the given string is equivalent to the truth string for
+		// the current word.
+		bool EquivalentToTruth(const char *str) const;
+
+		// Returns a null terminated UTF-8 encoded truth string for the current word.
+		// Use delete [] to free after use.
+		char* WordTruthUTF8Text() const;
+
+		// Returns a null terminated UTF-8 encoded normalized OCR string for the
+		// current word. Use delete [] to free after use.
+		char* WordNormedUTF8Text() const;
+
+		// Returns a pointer to serialized choice lattice.
+		// Fills lattice_size with the number of bytes in lattice data.
+		const char *WordLattice(int *lattice_size) const;
+
+		// ============= Functions that refer to symbols only ============.
+
+		// Returns true if the current symbol is a superscript.
+		// If iterating at a higher level object than symbols, eg words, then
+		// this will return the attributes of the first symbol in that word.
+		bool SymbolIsSuperscript() const;
+		// Returns true if the current symbol is a subscript.
+		// If iterating at a higher level object than symbols, eg words, then
+		// this will return the attributes of the first symbol in that word.
+		bool SymbolIsSubscript() const;
+		// Returns true if the current symbol is a dropcap.
+		// If iterating at a higher level object than symbols, eg words, then
+		// this will return the attributes of the first symbol in that word.
+		bool SymbolIsDropcap() const;
+
+	protected:
+		const char *line_separator_;
+		const char *paragraph_separator_;
+	};
+
+	// Class to iterate over the classifier choices for a single RIL_SYMBOL.
+	class ChoiceIterator {
+	public:
+		// Construction is from a LTRResultIterator that points to the symbol of
+		// interest. The ChoiceIterator allows a one-shot iteration over the
+		// choices for this symbol and after that is is useless.
+		explicit ChoiceIterator(const LTRResultIterator& result_it);
+		~ChoiceIterator();
+
+		// Moves to the next choice for the symbol and returns false if there
+		// are none left.
+		bool Next();
+
+		// ============= Accessing data ==============.
+
+		// Returns the null terminated UTF-8 encoded text string for the current
+		// choice.
+		// NOTE: Unlike LTRResultIterator::GetUTF8Text, the return points to an
+		// internal structure and should NOT be delete[]ed to free after use.
+		const char* GetUTF8Text() const;
+
+		// Returns the confidence of the current choice.
+		// The number should be interpreted as a percent probability. (0.0f-100.0f)
+		float Confidence() const;
+
+	private:
+		// Pointer to the WERD_RES object owned by the API.
+		WERD_RES* word_res_;
+		// Iterator over the blob choices.
+		BLOB_CHOICE_IT* choice_it_;
+	};
+
+}  // namespace tesseract.
+
+#endif  // TESSERACT_CCMAIN_LTR_RESULT_ITERATOR_H__
--- a/hgdriver/3rdparty/hgOCR/include/ccmain/mathfix.h
+++ b/hgdriver/3rdparty/hgOCR/include/ccmain/mathfix.h
@ -0,0 +1,38 @@
+///////////////////////////////////////////////////////////////////////
+// File:        mathfix.h
+// Description: Implement missing math functions
+// Author:      zdenop
+// Created:     Fri Feb 03 06:45:06 CET 2012
+//
+// (C) Copyright 2012, Google Inc.
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+// http://www.apache.org/licenses/LICENSE-2.0
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+///////////////////////////////////////////////////////////////////////
+
+#ifndef VS2008_INCLUDE_MATHFIX_H_
+#define VS2008_INCLUDE_MATHFIXT_H_
+
+#ifndef _MSC_VER
+#error "Use this header only with Microsoft Visual C++ compilers!"
+#endif
+
+#include <math.h>
+#include <float.h>  // for _isnan(), _finite() on VC++
+
+#if _MSC_VER < 1800
+#define isnan(x) _isnan(x)
+#define isinf(x) (!_finite(x))
+#define fmax max //VC++ does not implement all the provisions of C99 Standard
+#define round(x) roundf(x)
+inline float roundf(float num) { return num > 0 ? floorf(num + 0.5f) : ceilf(num - 0.5f); }
+#endif
+
+#endif  // VS2008_INCLUDE_MATHFIXT_H_
--- a/hgdriver/3rdparty/hgOCR/include/ccmain/mutableiterator.h
+++ b/hgdriver/3rdparty/hgOCR/include/ccmain/mutableiterator.h
@ -0,0 +1,64 @@
+///////////////////////////////////////////////////////////////////////
+// File:        mutableiterator.h
+// Description: Iterator for tesseract results providing access to
+//              both high-level API and Tesseract internal data structures.
+// Author:      David Eger
+// Created:     Thu Feb 24 19:01:06 PST 2011
+//
+// (C) Copyright 2011, Google Inc.
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+// http://www.apache.org/licenses/LICENSE-2.0
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+///////////////////////////////////////////////////////////////////////
+
+#ifndef TESSERACT_CCMAIN_MUTABLEITERATOR_H__
+#define TESSERACT_CCMAIN_MUTABLEITERATOR_H__
+
+#include "resultiterator.h"
+
+class BLOB_CHOICE_IT;
+
+namespace tesseract {
+
+	class Tesseract;
+
+	// Class to iterate over tesseract results, providing access to all levels
+	// of the page hierarchy, without including any tesseract headers or having
+	// to handle any tesseract structures.
+	// WARNING! This class points to data held within the TessBaseAPI class, and
+	// therefore can only be used while the TessBaseAPI class still exists and
+	// has not been subjected to a call of Init, SetImage, Recognize, Clear, End
+	// DetectOS, or anything else that changes the internal PAGE_RES.
+	// See apitypes.h for the definition of PageIteratorLevel.
+	// See also base class PageIterator, which contains the bulk of the interface.
+	// ResultIterator adds text-specific methods for access to OCR output.
+	// MutableIterator adds access to internal data structures.
+
+	class MutableIterator : public ResultIterator {
+	public:
+		// See argument descriptions in ResultIterator()
+		MutableIterator(PAGE_RES* page_res, Tesseract* tesseract,
+			int scale, int scaled_yres,
+			int rect_left, int rect_top,
+			int rect_width, int rect_height)
+			: ResultIterator(
+				LTRResultIterator(page_res, tesseract, scale, scaled_yres, rect_left,
+					rect_top, rect_width, rect_height)) {}
+		virtual ~MutableIterator() {}
+
+		// See PageIterator and ResultIterator for most calls.
+
+		// Return access to Tesseract internals.
+		const PAGE_RES_IT *PageResIt() const { return it_; }
+	};
+
+}  // namespace tesseract.
+
+#endif  // TESSERACT_CCMAIN_MUTABLEITERATOR_H__
--- a/hgdriver/3rdparty/hgOCR/include/ccmain/osdetect.cpp
+++ b/hgdriver/3rdparty/hgOCR/include/ccmain/osdetect.cpp
@ -0,0 +1,585 @@
+///////////////////////////////////////////////////////////////////////
+// File:        osdetect.cpp
+// Description: Orientation and script detection.
+// Author:      Samuel Charron
+//              Ranjith Unnikrishnan
+//
+// (C) Copyright 2008, Google Inc.
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+// http://www.apache.org/licenses/LICENSE-2.0
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+///////////////////////////////////////////////////////////////////////
+
+#include "osdetect.h"
+
+#include "blobbox.h"
+#include "blread.h"
+#include "colfind.h"
+#include "fontinfo.h"
+#include "imagefind.h"
+#include "linefind.h"
+#include "oldlist.h"
+#include "qrsequence.h"
+#include "ratngs.h"
+#include "strngs.h"
+#include "tabvector.h"
+#include "tesseractclass.h"
+#include "textord.h"
+
+const int kMinCharactersToTry = 20;
+const int kMaxCharactersToTry = 5 * kMinCharactersToTry;
+
+const float kSizeRatioToReject = 2.0;
+const int kMinAcceptableBlobHeight = 10;
+
+const float kScriptAcceptRatio = 1.3;
+
+const float kHanRatioInKorean = 0.7;
+const float kHanRatioInJapanese = 0.3;
+
+const float kNonAmbiguousMargin = 1.0;
+
+// General scripts
+static const char* han_script = "Han";
+static const char* latin_script = "Latin";
+static const char* katakana_script = "Katakana";
+static const char* hiragana_script = "Hiragana";
+static const char* hangul_script = "Hangul";
+
+// Pseudo-scripts Name
+const char* ScriptDetector::korean_script_ = "Korean";
+const char* ScriptDetector::japanese_script_ = "Japanese";
+const char* ScriptDetector::fraktur_script_ = "Fraktur";
+
+// Minimum believable resolution.
+const int kMinCredibleResolution = 70;
+
+void OSResults::update_best_orientation() {
+	float first = orientations[0];
+	float second = orientations[1];
+	best_result.orientation_id = 0;
+	if (orientations[0] < orientations[1]) {
+		first = orientations[1];
+		second = orientations[0];
+		best_result.orientation_id = 1;
+	}
+	for (int i = 2; i < 4; ++i) {
+		if (orientations[i] > first) {
+			second = first;
+			first = orientations[i];
+			best_result.orientation_id = i;
+		}
+		else if (orientations[i] > second) {
+			second = orientations[i];
+		}
+	}
+	// Store difference of top two orientation scores.
+	best_result.oconfidence = first - second;
+}
+
+void OSResults::set_best_orientation(int orientation_id) {
+	best_result.orientation_id = orientation_id;
+	best_result.oconfidence = 0;
+}
+
+void OSResults::update_best_script(int orientation) {
+	// We skip index 0 to ignore the "Common" script.
+	float first = scripts_na[orientation][1];
+	float second = scripts_na[orientation][2];
+	best_result.script_id = 1;
+	if (scripts_na[orientation][1] < scripts_na[orientation][2]) {
+		first = scripts_na[orientation][2];
+		second = scripts_na[orientation][1];
+		best_result.script_id = 2;
+	}
+	for (int i = 3; i < kMaxNumberOfScripts; ++i) {
+		if (scripts_na[orientation][i] > first) {
+			best_result.script_id = i;
+			second = first;
+			first = scripts_na[orientation][i];
+		}
+		else if (scripts_na[orientation][i] > second) {
+			second = scripts_na[orientation][i];
+		}
+	}
+	best_result.sconfidence =
+		(first / second - 1.0) / (kScriptAcceptRatio - 1.0);
+}
+
+int OSResults::get_best_script(int orientation_id) const {
+	int max_id = -1;
+	for (int j = 0; j < kMaxNumberOfScripts; ++j) {
+		const char *script = unicharset->get_script_from_script_id(j);
+		if (strcmp(script, "Common") && strcmp(script, "NULL")) {
+			if (max_id == -1 ||
+				scripts_na[orientation_id][j] > scripts_na[orientation_id][max_id])
+				max_id = j;
+		}
+	}
+	return max_id;
+}
+
+// Print the script scores for all possible orientations.
+void OSResults::print_scores(void) const {
+	for (int i = 0; i < 4; ++i) {
+		tprintf("Orientation id #%d", i);
+		print_scores(i);
+	}
+}
+
+// Print the script scores for the given candidate orientation.
+void OSResults::print_scores(int orientation_id) const {
+	for (int j = 0; j < kMaxNumberOfScripts; ++j) {
+		if (scripts_na[orientation_id][j]) {
+			tprintf("%12s\t: %f\n", unicharset->get_script_from_script_id(j),
+				scripts_na[orientation_id][j]);
+		}
+	}
+}
+
+// Accumulate scores with given OSResults instance and update the best script.
+void OSResults::accumulate(const OSResults& osr) {
+	for (int i = 0; i < 4; ++i) {
+		orientations[i] += osr.orientations[i];
+		for (int j = 0; j < kMaxNumberOfScripts; ++j)
+			scripts_na[i][j] += osr.scripts_na[i][j];
+	}
+	unicharset = osr.unicharset;
+	update_best_orientation();
+	update_best_script(best_result.orientation_id);
+}
+
+// Detect and erase horizontal/vertical lines and picture regions from the
+// image, so that non-text blobs are removed from consideration.
+void remove_nontext_regions(tesseract::Tesseract *tess, BLOCK_LIST *blocks,
+	TO_BLOCK_LIST *to_blocks) {
+	Pix *pix = tess->pix_binary();
+	ASSERT_HOST(pix != NULL);
+	int vertical_x = 0;
+	int vertical_y = 1;
+	tesseract::TabVector_LIST v_lines;
+	tesseract::TabVector_LIST h_lines;
+	int resolution;
+	if (kMinCredibleResolution > pixGetXRes(pix)) {
+		resolution = kMinCredibleResolution;
+		tprintf("Warning. Invalid resolution %d dpi. Using %d instead.\n",
+			pixGetXRes(pix), resolution);
+	}
+	else {
+		resolution = pixGetXRes(pix);
+	}
+
+	tesseract::LineFinder::FindAndRemoveLines(resolution, false, pix,
+		&vertical_x, &vertical_y,
+		NULL, &v_lines, &h_lines);
+	Pix* im_pix = tesseract::ImageFind::FindImages(pix);
+	if (im_pix != NULL) {
+		pixSubtract(pix, pix, im_pix);
+		pixDestroy(&im_pix);
+	}
+	tess->mutable_textord()->find_components(tess->pix_binary(),
+		blocks, to_blocks);
+}
+
+// Find connected components in the page and process a subset until finished or
+// a stopping criterion is met.
+// Returns the number of blobs used in making the estimate. 0 implies failure.
+int orientation_and_script_detection(STRING& filename,
+	OSResults* osr,
+	tesseract::Tesseract* tess) {
+	STRING name = filename;        //truncated name
+	const char *lastdot;           //of name
+	TBOX page_box;
+
+	lastdot = strrchr(name.string(), '.');
+	if (lastdot != NULL)
+		name[lastdot - name.string()] = '\0';
+
+	ASSERT_HOST(tess->pix_binary() != NULL)
+		int width = pixGetWidth(tess->pix_binary());
+	int height = pixGetHeight(tess->pix_binary());
+
+	BLOCK_LIST blocks;
+	if (!read_unlv_file(name, width, height, &blocks))
+		FullPageBlock(width, height, &blocks);
+
+	// Try to remove non-text regions from consideration.
+	TO_BLOCK_LIST land_blocks, port_blocks;
+	remove_nontext_regions(tess, &blocks, &port_blocks);
+
+	if (port_blocks.empty()) {
+		// page segmentation did not succeed, so we need to find_components first.
+		tess->mutable_textord()->find_components(tess->pix_binary(),
+			&blocks, &port_blocks);
+	}
+	else {
+		page_box.set_left(0);
+		page_box.set_bottom(0);
+		page_box.set_right(width);
+		page_box.set_top(height);
+		// Filter_blobs sets up the TO_BLOCKs the same as find_components does.
+		tess->mutable_textord()->filter_blobs(page_box.topright(),
+			&port_blocks, true);
+	}
+
+	return os_detect(&port_blocks, osr, tess);
+}
+
+// Filter and sample the blobs.
+// Returns a non-zero number of blobs if the page was successfully processed, or
+// zero if the page had too few characters to be reliable
+int os_detect(TO_BLOCK_LIST* port_blocks, OSResults* osr,
+	tesseract::Tesseract* tess) {
+	int blobs_total = 0;
+	TO_BLOCK_IT block_it;
+	block_it.set_to_list(port_blocks);
+
+	BLOBNBOX_CLIST filtered_list;
+	BLOBNBOX_C_IT filtered_it(&filtered_list);
+
+	for (block_it.mark_cycle_pt(); !block_it.cycled_list();
+		block_it.forward()) {
+		TO_BLOCK* to_block = block_it.data();
+		if (to_block->block->poly_block() &&
+			!to_block->block->poly_block()->IsText()) continue;
+		BLOBNBOX_IT bbox_it;
+		bbox_it.set_to_list(&to_block->blobs);
+		for (bbox_it.mark_cycle_pt(); !bbox_it.cycled_list();
+			bbox_it.forward()) {
+			BLOBNBOX* bbox = bbox_it.data();
+			C_BLOB*   blob = bbox->cblob();
+			TBOX      box = blob->bounding_box();
+			++blobs_total;
+
+			float y_x = fabs((box.height() * 1.0) / box.width());
+			float x_y = 1.0f / y_x;
+			// Select a >= 1.0 ratio
+			float ratio = x_y > y_x ? x_y : y_x;
+			// Blob is ambiguous
+			if (ratio > kSizeRatioToReject) continue;
+			if (box.height() < kMinAcceptableBlobHeight) continue;
+			filtered_it.add_to_end(bbox);
+		}
+	}
+	return os_detect_blobs(NULL, &filtered_list, osr, tess);
+}
+
+// Detect orientation and script from a list of blobs.
+// Returns a non-zero number of blobs if the list was successfully processed, or
+// zero if the list had too few characters to be reliable.
+// If allowed_scripts is non-null and non-empty, it is a list of scripts that
+// constrains both orientation and script detection to consider only scripts
+// from the list.
+int os_detect_blobs(const GenericVector<int>* allowed_scripts,
+	BLOBNBOX_CLIST* blob_list, OSResults* osr,
+	tesseract::Tesseract* tess) {
+	OSResults osr_;
+	if (osr == NULL)
+		osr = &osr_;
+
+	osr->unicharset = &tess->unicharset;
+	OrientationDetector o(allowed_scripts, osr);
+	ScriptDetector s(allowed_scripts, osr, tess);
+
+	BLOBNBOX_C_IT filtered_it(blob_list);
+	int real_max = MIN(filtered_it.length(), kMaxCharactersToTry);
+	// tprintf("Total blobs found = %d\n", blobs_total);
+	// tprintf("Number of blobs post-filtering = %d\n", filtered_it.length());
+	// tprintf("Number of blobs to try = %d\n", real_max);
+
+	// If there are too few characters, skip this page entirely.
+	if (real_max < kMinCharactersToTry / 2) {
+		tprintf("Too few characters. Skipping this page\n");
+		return 0;
+	}
+
+	BLOBNBOX** blobs = new BLOBNBOX*[filtered_it.length()];
+	int number_of_blobs = 0;
+	for (filtered_it.mark_cycle_pt(); !filtered_it.cycled_list();
+		filtered_it.forward()) {
+		blobs[number_of_blobs++] = (BLOBNBOX*)filtered_it.data();
+	}
+	QRSequenceGenerator sequence(number_of_blobs);
+	int num_blobs_evaluated = 0;
+	for (int i = 0; i < real_max; ++i) {
+		if (os_detect_blob(blobs[sequence.GetVal()], &o, &s, osr, tess)
+			&& i > kMinCharactersToTry) {
+			break;
+		}
+		++num_blobs_evaluated;
+	}
+	delete[] blobs;
+
+	// Make sure the best_result is up-to-date
+	int orientation = o.get_orientation();
+	osr->update_best_script(orientation);
+	return num_blobs_evaluated;
+}
+
+// Processes a single blob to estimate script and orientation.
+// Return true if estimate of orientation and script satisfies stopping
+// criteria.
+bool os_detect_blob(BLOBNBOX* bbox, OrientationDetector* o,
+	ScriptDetector* s, OSResults* osr,
+	tesseract::Tesseract* tess) {
+	tess->tess_cn_matching.set_value(true); // turn it on
+	tess->tess_bn_matching.set_value(false);
+	C_BLOB* blob = bbox->cblob();
+	TBLOB* tblob = TBLOB::PolygonalCopy(tess->poly_allow_detailed_fx, blob);
+	TBOX box = tblob->bounding_box();
+	FCOORD current_rotation(1.0f, 0.0f);
+	FCOORD rotation90(0.0f, 1.0f);
+	BLOB_CHOICE_LIST ratings[4];
+	// Test the 4 orientations
+	for (int i = 0; i < 4; ++i) {
+		// Normalize the blob. Set the origin to the place we want to be the
+		// bottom-middle after rotation.
+		// Scaling is to make the rotated height the x-height.
+		float scaling = static_cast<float>(kBlnXHeight) / box.height();
+		float x_origin = (box.left() + box.right()) / 2.0f;
+		float y_origin = (box.bottom() + box.top()) / 2.0f;
+		if (i == 0 || i == 2) {
+			// Rotation is 0 or 180.
+			y_origin = i == 0 ? box.bottom() : box.top();
+		}
+		else {
+			// Rotation is 90 or 270.
+			scaling = static_cast<float>(kBlnXHeight) / box.width();
+			x_origin = i == 1 ? box.left() : box.right();
+		}
+		TBLOB* rotated_blob = new TBLOB(*tblob);
+		rotated_blob->Normalize(NULL, &current_rotation, NULL,
+			x_origin, y_origin, scaling, scaling,
+			0.0f, static_cast<float>(kBlnBaselineOffset),
+			false, NULL);
+		tess->AdaptiveClassifier(rotated_blob, ratings + i);
+		delete rotated_blob;
+		current_rotation.rotate(rotation90);
+	}
+	delete tblob;
+
+	bool stop = o->detect_blob(ratings);
+	s->detect_blob(ratings);
+	int orientation = o->get_orientation();
+	stop = s->must_stop(orientation) && stop;
+	return stop;
+}
+
+
+OrientationDetector::OrientationDetector(
+	const GenericVector<int>* allowed_scripts, OSResults* osr) {
+	osr_ = osr;
+	allowed_scripts_ = allowed_scripts;
+}
+
+// Score the given blob and return true if it is now sure of the orientation
+// after adding this block.
+bool OrientationDetector::detect_blob(BLOB_CHOICE_LIST* scores) {
+	float blob_o_score[4] = { 0.0f, 0.0f, 0.0f, 0.0f };
+	float total_blob_o_score = 0.0f;
+
+	for (int i = 0; i < 4; ++i) {
+		BLOB_CHOICE_IT choice_it(scores + i);
+		if (!choice_it.empty()) {
+			BLOB_CHOICE* choice = NULL;
+			if (allowed_scripts_ != NULL && !allowed_scripts_->empty()) {
+				// Find the top choice in an allowed script.
+				for (choice_it.mark_cycle_pt(); !choice_it.cycled_list() &&
+					choice == NULL; choice_it.forward()) {
+					int choice_script = choice_it.data()->script_id();
+					int s = 0;
+					for (s = 0; s < allowed_scripts_->size(); ++s) {
+						if ((*allowed_scripts_)[s] == choice_script) {
+							choice = choice_it.data();
+							break;
+						}
+					}
+				}
+			}
+			else {
+				choice = choice_it.data();
+			}
+			if (choice != NULL) {
+				// The certainty score ranges between [-20,0]. This is converted here to
+				// [0,1], with 1 indicating best match.
+				blob_o_score[i] = 1 + 0.05 * choice->certainty();
+				total_blob_o_score += blob_o_score[i];
+			}
+		}
+	}
+	if (total_blob_o_score == 0.0) return false;
+	// Fill in any blanks with the worst score of the others. This is better than
+	// picking an arbitrary probability for it and way better than -inf.
+	float worst_score = 0.0f;
+	int num_good_scores = 0;
+	for (int i = 0; i < 4; ++i) {
+		if (blob_o_score[i] > 0.0f) {
+			++num_good_scores;
+			if (worst_score == 0.0f || blob_o_score[i] < worst_score)
+				worst_score = blob_o_score[i];
+		}
+	}
+	if (num_good_scores == 1) {
+		// Lower worst if there is only one.
+		worst_score /= 2.0f;
+	}
+	for (int i = 0; i < 4; ++i) {
+		if (blob_o_score[i] == 0.0f) {
+			blob_o_score[i] = worst_score;
+			total_blob_o_score += worst_score;
+		}
+	}
+	// Normalize the orientation scores for the blob and use them to
+	// update the aggregated orientation score.
+	for (int i = 0; total_blob_o_score != 0 && i < 4; ++i) {
+		osr_->orientations[i] += log(blob_o_score[i] / total_blob_o_score);
+	}
+
+	// TODO(ranjith) Add an early exit test, based on min_orientation_margin,
+	// as used in pagesegmain.cpp.
+	return false;
+}
+
+int OrientationDetector::get_orientation() {
+	osr_->update_best_orientation();
+	return osr_->best_result.orientation_id;
+}
+
+
+ScriptDetector::ScriptDetector(const GenericVector<int>* allowed_scripts,
+	OSResults* osr, tesseract::Tesseract* tess) {
+	osr_ = osr;
+	tess_ = tess;
+	allowed_scripts_ = allowed_scripts;
+	katakana_id_ = tess_->unicharset.add_script(katakana_script);
+	hiragana_id_ = tess_->unicharset.add_script(hiragana_script);
+	han_id_ = tess_->unicharset.add_script(han_script);
+	hangul_id_ = tess_->unicharset.add_script(hangul_script);
+	japanese_id_ = tess_->unicharset.add_script(japanese_script_);
+	korean_id_ = tess_->unicharset.add_script(korean_script_);
+	latin_id_ = tess_->unicharset.add_script(latin_script);
+	fraktur_id_ = tess_->unicharset.add_script(fraktur_script_);
+}
+
+
+// Score the given blob and return true if it is now sure of the script after
+// adding this blob.
+void ScriptDetector::detect_blob(BLOB_CHOICE_LIST* scores) {
+	bool done[kMaxNumberOfScripts];
+	for (int i = 0; i < 4; ++i) {
+		for (int j = 0; j < kMaxNumberOfScripts; ++j)
+			done[j] = false;
+
+		BLOB_CHOICE_IT choice_it;
+		choice_it.set_to_list(scores + i);
+
+		float prev_score = -1;
+		int script_count = 0;
+		int prev_id = -1;
+		int prev_fontinfo_id = -1;
+		const char* prev_unichar = "";
+		const char* unichar = "";
+
+		for (choice_it.mark_cycle_pt(); !choice_it.cycled_list();
+			choice_it.forward()) {
+			BLOB_CHOICE* choice = choice_it.data();
+			int id = choice->script_id();
+			if (allowed_scripts_ != NULL && !allowed_scripts_->empty()) {
+				// Check that the choice is in an allowed script.
+				int s = 0;
+				for (s = 0; s < allowed_scripts_->size(); ++s) {
+					if ((*allowed_scripts_)[s] == id) break;
+				}
+				if (s == allowed_scripts_->size()) continue;  // Not found in list.
+			}
+			// Script already processed before.
+			if (done[id]) continue;
+			done[id] = true;
+
+			unichar = tess_->unicharset.id_to_unichar(choice->unichar_id());
+			// Save data from the first match
+			if (prev_score < 0) {
+				prev_score = -choice->certainty();
+				script_count = 1;
+				prev_id = id;
+				prev_unichar = unichar;
+				prev_fontinfo_id = choice->fontinfo_id();
+			}
+			else if (-choice->certainty() < prev_score + kNonAmbiguousMargin) {
+				++script_count;
+			}
+
+			if (strlen(prev_unichar) == 1)
+				if (unichar[0] >= '0' && unichar[0] <= '9')
+					break;
+
+			// if script_count is >= 2, character is ambiguous, skip other matches
+			// since they are useless.
+			if (script_count >= 2)
+				break;
+		}
+		// Character is non ambiguous
+		if (script_count == 1) {
+			// Update the score of the winning script
+			osr_->scripts_na[i][prev_id] += 1.0;
+
+			// Workaround for Fraktur
+			if (prev_id == latin_id_) {
+				if (prev_fontinfo_id >= 0) {
+					const tesseract::FontInfo &fi =
+						tess_->get_fontinfo_table().get(prev_fontinfo_id);
+					//printf("Font: %s i:%i b:%i f:%i s:%i k:%i (%s)\n", fi.name,
+					//       fi.is_italic(), fi.is_bold(), fi.is_fixed_pitch(),
+					//       fi.is_serif(), fi.is_fraktur(),
+					//       prev_unichar);
+					if (fi.is_fraktur()) {
+						osr_->scripts_na[i][prev_id] -= 1.0;
+						osr_->scripts_na[i][fraktur_id_] += 1.0;
+					}
+				}
+			}
+
+			// Update Japanese / Korean pseudo-scripts
+			if (prev_id == katakana_id_)
+				osr_->scripts_na[i][japanese_id_] += 1.0;
+			if (prev_id == hiragana_id_)
+				osr_->scripts_na[i][japanese_id_] += 1.0;
+			if (prev_id == hangul_id_)
+				osr_->scripts_na[i][korean_id_] += 1.0;
+			if (prev_id == han_id_) {
+				osr_->scripts_na[i][korean_id_] += kHanRatioInKorean;
+				osr_->scripts_na[i][japanese_id_] += kHanRatioInJapanese;
+			}
+		}
+	}  // iterate over each orientation
+}
+
+bool ScriptDetector::must_stop(int orientation) {
+	osr_->update_best_script(orientation);
+	return osr_->best_result.sconfidence > 1;
+}
+
+// Helper method to convert an orientation index to its value in degrees.
+// The value represents the amount of clockwise rotation in degrees that must be
+// applied for the text to be upright (readable).
+int OrientationIdToValue(const int& id) {
+	switch (id) {
+	case 0:
+		return 0;
+	case 1:
+		return 270;
+	case 2:
+		return 180;
+	case 3:
+		return 90;
+	default:
+		return -1;
+	}
+}
--- a/hgdriver/3rdparty/hgOCR/include/ccmain/osdetect.h
+++ b/hgdriver/3rdparty/hgOCR/include/ccmain/osdetect.h
@ -0,0 +1,138 @@
+///////////////////////////////////////////////////////////////////////
+// File:        osdetect.h
+// Description: Orientation and script detection.
+// Author:      Samuel Charron
+//              Ranjith Unnikrishnan
+//
+// (C) Copyright 2008, Google Inc.
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+// http://www.apache.org/licenses/LICENSE-2.0
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+///////////////////////////////////////////////////////////////////////
+
+#ifndef TESSERACT_CCMAIN_OSDETECT_H__
+#define TESSERACT_CCMAIN_OSDETECT_H__
+
+#include "strngs.h"
+#include "unicharset.h"
+
+class TO_BLOCK_LIST;
+class BLOBNBOX;
+class BLOB_CHOICE_LIST;
+class BLOBNBOX_CLIST;
+
+namespace tesseract {
+	class Tesseract;
+}
+
+// Max number of scripts in ICU + "NULL" + Japanese and Korean + Fraktur
+const int kMaxNumberOfScripts = 116 + 1 + 2 + 1;
+
+struct OSBestResult {
+	OSBestResult() : orientation_id(0), script_id(0), sconfidence(0.0),
+		oconfidence(0.0) {}
+	int orientation_id;
+	int script_id;
+	float sconfidence;
+	float oconfidence;
+};
+
+struct OSResults {
+	OSResults() : unicharset(NULL) {
+		for (int i = 0; i < 4; ++i) {
+			for (int j = 0; j < kMaxNumberOfScripts; ++j)
+				scripts_na[i][j] = 0;
+			orientations[i] = 0;
+		}
+	}
+	void update_best_orientation();
+	// Set the estimate of the orientation to the given id.
+	void set_best_orientation(int orientation_id);
+	// Update/Compute the best estimate of the script assuming the given
+	// orientation id.
+	void update_best_script(int orientation_id);
+	// Return the index of the script with the highest score for this orientation.
+	TESS_API int get_best_script(int orientation_id) const;
+	// Accumulate scores with given OSResults instance and update the best script.
+	void accumulate(const OSResults& osr);
+
+	// Print statistics.
+	void print_scores(void) const;
+	void print_scores(int orientation_id) const;
+
+	// Array holding scores for each orientation id [0,3].
+	// Orientation ids [0..3] map to [0, 270, 180, 90] degree orientations of the
+	// page respectively, where the values refer to the amount of clockwise
+	// rotation to be applied to the page for the text to be upright and readable.
+	float orientations[4];
+	// Script confidence scores for each of 4 possible orientations.
+	float scripts_na[4][kMaxNumberOfScripts];
+
+	UNICHARSET* unicharset;
+	OSBestResult best_result;
+};
+
+class OrientationDetector {
+public:
+	OrientationDetector(const GenericVector<int>* allowed_scripts,
+		OSResults* results);
+	bool detect_blob(BLOB_CHOICE_LIST* scores);
+	int get_orientation();
+private:
+	OSResults* osr_;
+	const GenericVector<int>* allowed_scripts_;
+};
+
+class ScriptDetector {
+public:
+	ScriptDetector(const GenericVector<int>* allowed_scripts,
+		OSResults* osr, tesseract::Tesseract* tess);
+	void detect_blob(BLOB_CHOICE_LIST* scores);
+	bool must_stop(int orientation);
+private:
+	OSResults* osr_;
+	static const char* korean_script_;
+	static const char* japanese_script_;
+	static const char* fraktur_script_;
+	int korean_id_;
+	int japanese_id_;
+	int katakana_id_;
+	int hiragana_id_;
+	int han_id_;
+	int hangul_id_;
+	int latin_id_;
+	int fraktur_id_;
+	tesseract::Tesseract* tess_;
+	const GenericVector<int>* allowed_scripts_;
+};
+
+int orientation_and_script_detection(STRING& filename,
+	OSResults*,
+	tesseract::Tesseract*);
+
+int os_detect(TO_BLOCK_LIST* port_blocks,
+	OSResults* osr,
+	tesseract::Tesseract* tess);
+
+int os_detect_blobs(const GenericVector<int>* allowed_scripts,
+	BLOBNBOX_CLIST* blob_list,
+	OSResults* osr,
+	tesseract::Tesseract* tess);
+
+bool os_detect_blob(BLOBNBOX* bbox, OrientationDetector* o,
+	ScriptDetector* s, OSResults*,
+	tesseract::Tesseract* tess);
+
+// Helper method to convert an orientation index to its value in degrees.
+// The value represents the amount of clockwise rotation in degrees that must be
+// applied for the text to be upright (readable).
+TESS_API int OrientationIdToValue(const int& id);
+
+#endif  // TESSERACT_CCMAIN_OSDETECT_H__
--- a/hgdriver/3rdparty/hgOCR/include/ccmain/output.cpp
+++ b/hgdriver/3rdparty/hgOCR/include/ccmain/output.cpp
@ -0,0 +1,450 @@
+/******************************************************************
+ * File:        output.cpp  (Formerly output.c)
+ * Description: Output pass
+ * Author:          Phil Cheatle
+ * Created:         Thu Aug  4 10:56:08 BST 1994
+ *
+ * (C) Copyright 1994, Hewlett-Packard Ltd.
+ ** Licensed under the Apache License, Version 2.0 (the "License");
+ ** you may not use this file except in compliance with the License.
+ ** You may obtain a copy of the License at
+ ** http://www.apache.org/licenses/LICENSE-2.0
+ ** Unless required by applicable law or agreed to in writing, software
+ ** distributed under the License is distributed on an "AS IS" BASIS,
+ ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ ** See the License for the specific language governing permissions and
+ ** limitations under the License.
+ *
+ **********************************************************************/
+
+#ifdef _MSC_VER
+#pragma warning(disable:4244)  // Conversion warnings
+#endif
+
+#include <string.h>
+#include <ctype.h>
+#ifdef __UNIX__
+#include          <assert.h>
+#include          <unistd.h>
+#include          <errno.h>
+#endif
+#include "helpers.h"
+#include "tessvars.h"
+#include "control.h"
+#include "reject.h"
+#include "docqual.h"
+#include "output.h"
+#include "globals.h"
+#include "tesseractclass.h"
+
+#define EPAPER_EXT      ".ep"
+#define PAGE_YSIZE      3508
+#define CTRL_INSET      '\024'   //dc4=text inset
+#define CTRL_FONT       '\016'   //so=font change
+#define CTRL_DEFAULT      '\017' //si=default font
+#define CTRL_SHIFT      '\022'   //dc2=x shift
+#define CTRL_TAB        '\011'   //tab
+#define CTRL_NEWLINE      '\012' //newline
+#define CTRL_HARDLINE   '\015'   //cr
+
+ /**********************************************************************
+  * pixels_to_pts
+  *
+  * Convert an integer number of pixels to the nearest integer
+  * number of points.
+  **********************************************************************/
+
+inT32 pixels_to_pts(               //convert coords
+	inT32 pixels,
+	inT32 pix_res  //resolution
+) {
+	float pts;                     //converted value
+
+	pts = pixels * 72.0 / pix_res;
+	return (inT32)(pts + 0.5);    //round it
+}
+
+namespace tesseract {
+	void Tesseract::output_pass(  //Tess output pass //send to api
+		PAGE_RES_IT &page_res_it,
+		const TBOX *target_word_box) {
+		BLOCK_RES *block_of_last_word;
+		BOOL8 force_eol;               //During output
+		BLOCK *nextblock;              //block of next word
+		WERD *nextword;                //next word
+
+		page_res_it.restart_page();
+		block_of_last_word = NULL;
+		while (page_res_it.word() != NULL) {
+			check_debug_pt(page_res_it.word(), 120);
+
+			if (target_word_box) {
+				TBOX current_word_box = page_res_it.word()->word->bounding_box();
+				FCOORD center_pt(
+					(current_word_box.right() + current_word_box.left()) / 2,
+					(current_word_box.bottom() + current_word_box.top()) / 2);
+				if (!target_word_box->contains(center_pt)) {
+					page_res_it.forward();
+					continue;
+				}
+			}
+			if (tessedit_write_block_separators &&
+				block_of_last_word != page_res_it.block()) {
+				block_of_last_word = page_res_it.block();
+			}
+
+			force_eol = (tessedit_write_block_separators &&
+				(page_res_it.block() != page_res_it.next_block())) ||
+				(page_res_it.next_word() == NULL);
+
+			if (page_res_it.next_word() != NULL)
+				nextword = page_res_it.next_word()->word;
+			else
+				nextword = NULL;
+			if (page_res_it.next_block() != NULL)
+				nextblock = page_res_it.next_block()->block;
+			else
+				nextblock = NULL;
+			//regardless of tilde crunching
+			write_results(page_res_it,
+				determine_newline_type(page_res_it.word()->word,
+					page_res_it.block()->block,
+					nextword, nextblock), force_eol);
+			page_res_it.forward();
+		}
+	}
+
+
+	/*************************************************************************
+	 * write_results()
+	 *
+	 * All recognition and rejection has now been done. Generate the following:
+	 *   .txt file     - giving the final best choices with NO highlighting
+	 *   .raw file     - giving the tesseract top choice output for each word
+	 *   .map file     - showing how the .txt file has been rejected in the .ep file
+	 *   epchoice list - a list of one element per word, containing the text for the
+	 *                   epaper. Reject strings are inserted.
+	 *   inset list    - a list of bounding boxes of reject insets - indexed by the
+	 *                   reject strings in the epchoice text.
+	 *************************************************************************/
+	void Tesseract::write_results(PAGE_RES_IT &page_res_it,
+		char newline_type,  // type of newline
+		BOOL8 force_eol) {  // override tilde crunch?
+		WERD_RES *word = page_res_it.word();
+		const UNICHARSET &uchset = *word->uch_set;
+		int i;
+		BOOL8 need_reject = FALSE;
+		UNICHAR_ID space = uchset.unichar_to_id(" ");
+
+		if ((word->unlv_crunch_mode != CR_NONE ||
+			word->best_choice->length() == 0) &&
+			!tessedit_zero_kelvin_rejection && !tessedit_word_for_word) {
+			if ((word->unlv_crunch_mode != CR_DELETE) &&
+				(!stats_.tilde_crunch_written ||
+				((word->unlv_crunch_mode == CR_KEEP_SPACE) &&
+					(word->word->space() > 0) &&
+					!word->word->flag(W_FUZZY_NON) &&
+					!word->word->flag(W_FUZZY_SP)))) {
+				if (!word->word->flag(W_BOL) &&
+					(word->word->space() > 0) &&
+					!word->word->flag(W_FUZZY_NON) &&
+					!word->word->flag(W_FUZZY_SP)) {
+					stats_.last_char_was_tilde = false;
+				}
+				need_reject = TRUE;
+			}
+			if ((need_reject && !stats_.last_char_was_tilde) ||
+				(force_eol && stats_.write_results_empty_block)) {
+				/* Write a reject char - mark as rejected unless zero_rejection mode */
+				stats_.last_char_was_tilde = TRUE;
+				stats_.tilde_crunch_written = true;
+				stats_.last_char_was_newline = false;
+				stats_.write_results_empty_block = false;
+			}
+
+			if ((word->word->flag(W_EOL) && !stats_.last_char_was_newline) || force_eol) {
+				stats_.tilde_crunch_written = false;
+				stats_.last_char_was_newline = true;
+				stats_.last_char_was_tilde = false;
+			}
+
+			if (force_eol)
+				stats_.write_results_empty_block = true;
+			return;
+		}
+
+		/* NORMAL PROCESSING of non tilde crunched words */
+
+		stats_.tilde_crunch_written = false;
+		if (newline_type)
+			stats_.last_char_was_newline = true;
+		else
+			stats_.last_char_was_newline = false;
+		stats_.write_results_empty_block = force_eol;  // about to write a real word
+
+		if (unlv_tilde_crunching &&
+			stats_.last_char_was_tilde &&
+			(word->word->space() == 0) &&
+			!(word->word->flag(W_REP_CHAR) && tessedit_write_rep_codes) &&
+			(word->best_choice->unichar_id(0) == space)) {
+			/* Prevent adjacent tilde across words - we know that adjacent tildes within
+			   words have been removed */
+			word->MergeAdjacentBlobs(0);
+		}
+		if (newline_type ||
+			(word->word->flag(W_REP_CHAR) && tessedit_write_rep_codes))
+			stats_.last_char_was_tilde = false;
+		else {
+			if (word->reject_map.length() > 0) {
+				if (word->best_choice->unichar_id(word->reject_map.length() - 1) == space)
+					stats_.last_char_was_tilde = true;
+				else
+					stats_.last_char_was_tilde = false;
+			}
+			else if (word->word->space() > 0)
+				stats_.last_char_was_tilde = false;
+			/* else it is unchanged as there are no output chars */
+		}
+
+		ASSERT_HOST(word->best_choice->length() == word->reject_map.length());
+
+		set_unlv_suspects(word);
+		check_debug_pt(word, 120);
+		if (tessedit_rejection_debug) {
+			tprintf("Dict word: \"%s\": %d\n",
+				word->best_choice->debug_string().string(),
+				dict_word(*(word->best_choice)));
+		}
+		if (!word->word->flag(W_REP_CHAR) || !tessedit_write_rep_codes) {
+			if (tessedit_zero_rejection) {
+				/* OVERRIDE ALL REJECTION MECHANISMS - ONLY REJECT TESS FAILURES */
+				for (i = 0; i < word->best_choice->length(); ++i) {
+					if (word->reject_map[i].rejected())
+						word->reject_map[i].setrej_minimal_rej_accept();
+				}
+			}
+			if (tessedit_minimal_rejection) {
+				/* OVERRIDE ALL REJECTION MECHANISMS - ONLY REJECT TESS FAILURES */
+				for (i = 0; i < word->best_choice->length(); ++i) {
+					if ((word->best_choice->unichar_id(i) != space) &&
+						word->reject_map[i].rejected())
+						word->reject_map[i].setrej_minimal_rej_accept();
+				}
+			}
+		}
+	}
+}  // namespace tesseract
+
+/**********************************************************************
+ * determine_newline_type
+ *
+ * Find whether we have a wrapping or hard newline.
+ * Return FALSE if not at end of line.
+ **********************************************************************/
+
+char determine_newline_type(                   //test line ends
+	WERD *word,        //word to do
+	BLOCK *block,      //current block
+	WERD *next_word,   //next word
+	BLOCK *next_block  //block of next word
+) {
+	inT16 end_gap;                 //to right edge
+	inT16 width;                   //of next word
+	TBOX word_box;                  //bounding
+	TBOX next_box;                  //next word
+	TBOX block_box;                 //block bounding
+
+	if (!word->flag(W_EOL))
+		return FALSE;                //not end of line
+	if (next_word == NULL || next_block == NULL || block != next_block)
+		return CTRL_NEWLINE;
+	if (next_word->space() > 0)
+		return CTRL_HARDLINE;        //it is tabbed
+	word_box = word->bounding_box();
+	next_box = next_word->bounding_box();
+	block_box = block->bounding_box();
+	//gap to eol
+	end_gap = block_box.right() - word_box.right();
+	end_gap -= (inT32)block->space();
+	width = next_box.right() - next_box.left();
+	//      tprintf("end_gap=%d-%d=%d, width=%d-%d=%d, nl=%d\n",
+	//              block_box.right(),word_box.right(),end_gap,
+	//              next_box.right(),next_box.left(),width,
+	//              end_gap>width ? CTRL_HARDLINE : CTRL_NEWLINE);
+	return end_gap > width ? CTRL_HARDLINE : CTRL_NEWLINE;
+}
+
+/*************************************************************************
+ * get_rep_char()
+ * Return the first accepted character from the repetition string. This is the
+ * character which is repeated - as determined earlier by fix_rep_char()
+ *************************************************************************/
+namespace tesseract {
+	UNICHAR_ID Tesseract::get_rep_char(WERD_RES *word) {  // what char is repeated?
+		int i;
+		for (i = 0; ((i < word->reject_map.length()) &&
+			(word->reject_map[i].rejected())); ++i);
+
+		if (i < word->reject_map.length()) {
+			return word->best_choice->unichar_id(i);
+		}
+		else {
+			return word->uch_set->unichar_to_id(unrecognised_char.string());
+		}
+	}
+
+	/*************************************************************************
+	 * SUSPECT LEVELS
+	 *
+	 * 0 - don't reject ANYTHING
+	 * 1,2 - partial rejection
+	 * 3 - BEST
+	 *
+	 * NOTE: to reject JUST tess failures in the .map file set suspect_level 3 and
+	 * tessedit_minimal_rejection.
+	 *************************************************************************/
+	void Tesseract::set_unlv_suspects(WERD_RES *word_res) {
+		int len = word_res->reject_map.length();
+		const WERD_CHOICE &word = *(word_res->best_choice);
+		const UNICHARSET &uchset = *word.unicharset();
+		int i;
+		float rating_per_ch;
+
+		if (suspect_level == 0) {
+			for (i = 0; i < len; i++) {
+				if (word_res->reject_map[i].rejected())
+					word_res->reject_map[i].setrej_minimal_rej_accept();
+			}
+			return;
+		}
+
+		if (suspect_level >= 3)
+			return;                      //Use defaults
+
+		  /* NOW FOR LEVELS 1 and 2 Find some stuff to unreject*/
+
+		if (safe_dict_word(word_res) &&
+			(count_alphas(word) > suspect_short_words)) {
+			/* Unreject alphas in dictionary words */
+			for (i = 0; i < len; ++i) {
+				if (word_res->reject_map[i].rejected() &&
+					uchset.get_isalpha(word.unichar_id(i)))
+					word_res->reject_map[i].setrej_minimal_rej_accept();
+			}
+		}
+
+		rating_per_ch = word.rating() / word_res->reject_map.length();
+
+		if (rating_per_ch >= suspect_rating_per_ch)
+			return;  // Don't touch bad ratings
+
+		if ((word_res->tess_accepted) || (rating_per_ch < suspect_accept_rating)) {
+			/* Unreject any Tess Acceptable word - but NOT tess reject chs*/
+			for (i = 0; i < len; ++i) {
+				if (word_res->reject_map[i].rejected() &&
+					(!uchset.eq(word.unichar_id(i), " ")))
+					word_res->reject_map[i].setrej_minimal_rej_accept();
+			}
+		}
+
+		for (i = 0; i < len; i++) {
+			if (word_res->reject_map[i].rejected()) {
+				if (word_res->reject_map[i].flag(R_DOC_REJ))
+					word_res->reject_map[i].setrej_minimal_rej_accept();
+				if (word_res->reject_map[i].flag(R_BLOCK_REJ))
+					word_res->reject_map[i].setrej_minimal_rej_accept();
+				if (word_res->reject_map[i].flag(R_ROW_REJ))
+					word_res->reject_map[i].setrej_minimal_rej_accept();
+			}
+		}
+
+		if (suspect_level == 2)
+			return;
+
+		if (!suspect_constrain_1Il ||
+			(word_res->reject_map.length() <= suspect_short_words)) {
+			for (i = 0; i < len; i++) {
+				if (word_res->reject_map[i].rejected()) {
+					if ((word_res->reject_map[i].flag(R_1IL_CONFLICT) ||
+						word_res->reject_map[i].flag(R_POSTNN_1IL)))
+						word_res->reject_map[i].setrej_minimal_rej_accept();
+
+					if (!suspect_constrain_1Il &&
+						word_res->reject_map[i].flag(R_MM_REJECT))
+						word_res->reject_map[i].setrej_minimal_rej_accept();
+				}
+			}
+		}
+
+		if (acceptable_word_string(*word_res->uch_set,
+			word.unichar_string().string(),
+			word.unichar_lengths().string()) !=
+			AC_UNACCEPTABLE ||
+			acceptable_number_string(word.unichar_string().string(),
+				word.unichar_lengths().string())) {
+			if (word_res->reject_map.length() > suspect_short_words) {
+				for (i = 0; i < len; i++) {
+					if (word_res->reject_map[i].rejected() &&
+						(!word_res->reject_map[i].perm_rejected() ||
+							word_res->reject_map[i].flag(R_1IL_CONFLICT) ||
+							word_res->reject_map[i].flag(R_POSTNN_1IL) ||
+							word_res->reject_map[i].flag(R_MM_REJECT))) {
+						word_res->reject_map[i].setrej_minimal_rej_accept();
+					}
+				}
+			}
+		}
+	}
+
+	inT16 Tesseract::count_alphas(const WERD_CHOICE &word) {
+		int count = 0;
+		for (int i = 0; i < word.length(); ++i) {
+			if (word.unicharset()->get_isalpha(word.unichar_id(i)))
+				count++;
+		}
+		return count;
+	}
+
+
+	inT16 Tesseract::count_alphanums(const WERD_CHOICE &word) {
+		int count = 0;
+		for (int i = 0; i < word.length(); ++i) {
+			if (word.unicharset()->get_isalpha(word.unichar_id(i)) ||
+				word.unicharset()->get_isdigit(word.unichar_id(i)))
+				count++;
+		}
+		return count;
+	}
+
+
+	BOOL8 Tesseract::acceptable_number_string(const char *s,
+		const char *lengths) {
+		BOOL8 prev_digit = FALSE;
+
+		if (*lengths == 1 && *s == '(')
+			s++;
+
+		if (*lengths == 1 &&
+			((*s == '$') || (*s == '.') || (*s == '+') || (*s == '-')))
+			s++;
+
+		for (; *s != '\0'; s += *(lengths++)) {
+			if (unicharset.get_isdigit(s, *lengths))
+				prev_digit = TRUE;
+			else if (prev_digit &&
+				(*lengths == 1 && ((*s == '.') || (*s == ',') || (*s == '-'))))
+				prev_digit = FALSE;
+			else if (prev_digit && *lengths == 1 &&
+				(*(s + *lengths) == '\0') && ((*s == '%') || (*s == ')')))
+				return TRUE;
+			else if (prev_digit &&
+				*lengths == 1 && (*s == '%') &&
+				(*(lengths + 1) == 1 && *(s + *lengths) == ')') &&
+				(*(s + *lengths + *(lengths + 1)) == '\0'))
+				return TRUE;
+			else
+				return FALSE;
+		}
+		return TRUE;
+	}
+}  // namespace tesseract
--- a/hgdriver/3rdparty/hgOCR/include/ccmain/output.h
+++ b/hgdriver/3rdparty/hgOCR/include/ccmain/output.h
@ -0,0 +1,33 @@
+/******************************************************************
+ * File:        output.h  (Formerly output.h)
+ * Description: Output pass
+ * Author:		Phil Cheatle
+ * Created:		Thu Aug  4 10:56:08 BST 1994
+ *
+ * (C) Copyright 1994, Hewlett-Packard Ltd.
+ ** Licensed under the Apache License, Version 2.0 (the "License");
+ ** you may not use this file except in compliance with the License.
+ ** You may obtain a copy of the License at
+ ** http://www.apache.org/licenses/LICENSE-2.0
+ ** Unless required by applicable law or agreed to in writing, software
+ ** distributed under the License is distributed on an "AS IS" BASIS,
+ ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ ** See the License for the specific language governing permissions and
+ ** limitations under the License.
+ *
+ **********************************************************************/
+
+#ifndef           OUTPUT_H
+#define           OUTPUT_H
+
+#include          "params.h"
+ //#include                                      "epapconv.h"
+#include          "pageres.h"
+
+/** test line ends */
+char determine_newline_type(WERD *word,        ///< word to do
+	BLOCK *block,      ///< current block
+	WERD *next_word,   ///< next word
+	BLOCK *next_block  ///< block of next word
+);
+#endif
--- a/hgdriver/3rdparty/hgOCR/include/ccmain/pageiterator.cpp
+++ b/hgdriver/3rdparty/hgOCR/include/ccmain/pageiterator.cpp
@ -0,0 +1,631 @@
+///////////////////////////////////////////////////////////////////////
+// File:        pageiterator.cpp
+// Description: Iterator for tesseract page structure that avoids using
+//              tesseract internal data structures.
+// Author:      Ray Smith
+// Created:     Fri Feb 26 14:32:09 PST 2010
+//
+// (C) Copyright 2010, Google Inc.
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+// http://www.apache.org/licenses/LICENSE-2.0
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+///////////////////////////////////////////////////////////////////////
+
+#include "pageiterator.h"
+#include "allheaders.h"
+#include "helpers.h"
+#include "pageres.h"
+#include "tesseractclass.h"
+
+namespace tesseract {
+
+	PageIterator::PageIterator(PAGE_RES* page_res, Tesseract* tesseract, int scale,
+		int scaled_yres, int rect_left, int rect_top,
+		int rect_width, int rect_height)
+		: page_res_(page_res),
+		tesseract_(tesseract),
+		word_(NULL),
+		word_length_(0),
+		blob_index_(0),
+		cblob_it_(NULL),
+		include_upper_dots_(false),
+		include_lower_dots_(false),
+		scale_(scale),
+		scaled_yres_(scaled_yres),
+		rect_left_(rect_left),
+		rect_top_(rect_top),
+		rect_width_(rect_width),
+		rect_height_(rect_height) {
+		it_ = new PAGE_RES_IT(page_res);
+		PageIterator::Begin();
+	}
+
+	PageIterator::~PageIterator() {
+		delete it_;
+		delete cblob_it_;
+	}
+
+	/**
+	 * PageIterators may be copied! This makes it possible to iterate over
+	 * all the objects at a lower level, while maintaining an iterator to
+	 * objects at a higher level.
+	 */
+	PageIterator::PageIterator(const PageIterator& src)
+		: page_res_(src.page_res_),
+		tesseract_(src.tesseract_),
+		word_(NULL),
+		word_length_(src.word_length_),
+		blob_index_(src.blob_index_),
+		cblob_it_(NULL),
+		include_upper_dots_(src.include_upper_dots_),
+		include_lower_dots_(src.include_lower_dots_),
+		scale_(src.scale_),
+		scaled_yres_(src.scaled_yres_),
+		rect_left_(src.rect_left_),
+		rect_top_(src.rect_top_),
+		rect_width_(src.rect_width_),
+		rect_height_(src.rect_height_) {
+		it_ = new PAGE_RES_IT(*src.it_);
+		BeginWord(src.blob_index_);
+	}
+
+	const PageIterator& PageIterator::operator=(const PageIterator& src) {
+		page_res_ = src.page_res_;
+		tesseract_ = src.tesseract_;
+		include_upper_dots_ = src.include_upper_dots_;
+		include_lower_dots_ = src.include_lower_dots_;
+		scale_ = src.scale_;
+		scaled_yres_ = src.scaled_yres_;
+		rect_left_ = src.rect_left_;
+		rect_top_ = src.rect_top_;
+		rect_width_ = src.rect_width_;
+		rect_height_ = src.rect_height_;
+		delete it_;
+		it_ = new PAGE_RES_IT(*src.it_);
+		BeginWord(src.blob_index_);
+		return *this;
+	}
+
+	bool PageIterator::PositionedAtSameWord(const PAGE_RES_IT* other) const {
+		return (it_ == NULL && it_ == other) ||
+			((other != NULL) && (it_ != NULL) && (*it_ == *other));
+	}
+
+	// ============= Moving around within the page ============.
+
+	/** Resets the iterator to point to the start of the page. */
+	void PageIterator::Begin() {
+		it_->restart_page_with_empties();
+		BeginWord(0);
+	}
+
+	void PageIterator::RestartParagraph() {
+		if (it_->block() == NULL) return; // At end of the document.
+		PAGE_RES_IT para(page_res_);
+		PAGE_RES_IT next_para(para);
+		next_para.forward_paragraph();
+		while (next_para.cmp(*it_) <= 0) {
+			para = next_para;
+			next_para.forward_paragraph();
+		}
+		*it_ = para;
+		BeginWord(0);
+	}
+
+	bool PageIterator::IsWithinFirstTextlineOfParagraph() const {
+		PageIterator p_start(*this);
+		p_start.RestartParagraph();
+		return p_start.it_->row() == it_->row();
+	}
+
+	void PageIterator::RestartRow() {
+		it_->restart_row();
+		BeginWord(0);
+	}
+
+	/**
+	 * Moves to the start of the next object at the given level in the
+	 * page hierarchy, and returns false if the end of the page was reached.
+	 * NOTE (CHANGED!) that ALL PageIteratorLevel level values will visit each
+	 * non-text block at least once.
+	 * Think of non text blocks as containing a single para, with at least one
+	 * line, with a single imaginary word, containing a single symbol.
+	 * The bounding boxes mark out any polygonal nature of the block, and
+	 * PTIsTextType(BLockType()) is false for non-text blocks.
+	 * Calls to Next with different levels may be freely intermixed.
+	 * This function iterates words in right-to-left scripts correctly, if
+	 * the appropriate language has been loaded into Tesseract.
+	 */
+	bool PageIterator::Next(PageIteratorLevel level) {
+		if (it_->block() == NULL) return false;  // Already at the end!
+		if (it_->word() == NULL)
+			level = RIL_BLOCK;
+
+		switch (level) {
+		case RIL_BLOCK:
+			it_->forward_block();
+			break;
+		case RIL_PARA:
+			it_->forward_paragraph();
+			break;
+		case RIL_TEXTLINE:
+			for (it_->forward_with_empties(); it_->row() == it_->prev_row();
+				it_->forward_with_empties());
+			break;
+		case RIL_WORD:
+			it_->forward_with_empties();
+			break;
+		case RIL_SYMBOL:
+			if (cblob_it_ != NULL)
+				cblob_it_->forward();
+			++blob_index_;
+			if (blob_index_ >= word_length_)
+				it_->forward_with_empties();
+			else
+				return true;
+			break;
+		}
+		BeginWord(0);
+		return it_->block() != NULL;
+	}
+
+	/**
+	 * Returns true if the iterator is at the start of an object at the given
+	 * level. Possible uses include determining if a call to Next(RIL_WORD)
+	 * moved to the start of a RIL_PARA.
+	 */
+	bool PageIterator::IsAtBeginningOf(PageIteratorLevel level) const {
+		if (it_->block() == NULL) return false;  // Already at the end!
+		if (it_->word() == NULL) return true;  // In an image block.
+		switch (level) {
+		case RIL_BLOCK:
+			return blob_index_ == 0 && it_->block() != it_->prev_block();
+		case RIL_PARA:
+			return blob_index_ == 0 &&
+				(it_->block() != it_->prev_block() ||
+					it_->row()->row->para() != it_->prev_row()->row->para());
+		case RIL_TEXTLINE:
+			return blob_index_ == 0 && it_->row() != it_->prev_row();
+		case RIL_WORD:
+			return blob_index_ == 0;
+		case RIL_SYMBOL:
+			return true;
+		}
+		return false;
+	}
+
+	/**
+	 * Returns whether the iterator is positioned at the last element in a
+	 * given level. (e.g. the last word in a line, the last line in a block)
+	 */
+	bool PageIterator::IsAtFinalElement(PageIteratorLevel level,
+		PageIteratorLevel element) const {
+		if (Empty(element)) return true;  // Already at the end!
+		// The result is true if we step forward by element and find we are
+		// at the the end of the page or at beginning of *all* levels in:
+		// [level, element).
+		// When there is more than one level difference between element and level,
+		// we could for instance move forward one symbol and still be at the first
+		// word on a line, so we also have to be at the first symbol in a word.
+		PageIterator next(*this);
+		next.Next(element);
+		if (next.Empty(element)) return true;  // Reached the end of the page.
+		while (element > level) {
+			element = static_cast<PageIteratorLevel>(element - 1);
+			if (!next.IsAtBeginningOf(element))
+				return false;
+		}
+		return true;
+	}
+
+	/**
+	 * Returns whether this iterator is positioned
+	 *   before other:   -1
+	 *   equal to other:  0
+	 *   after other:     1
+	 */
+	int PageIterator::Cmp(const PageIterator &other) const {
+		int word_cmp = it_->cmp(*other.it_);
+		if (word_cmp != 0)
+			return word_cmp;
+		if (blob_index_ < other.blob_index_)
+			return -1;
+		if (blob_index_ == other.blob_index_)
+			return 0;
+		return 1;
+	}
+
+	// ============= Accessing data ==============.
+	// Coordinate system:
+	// Integer coordinates are at the cracks between the pixels.
+	// The top-left corner of the top-left pixel in the image is at (0,0).
+	// The bottom-right corner of the bottom-right pixel in the image is at
+	// (width, height).
+	// Every bounding box goes from the top-left of the top-left contained
+	// pixel to the bottom-right of the bottom-right contained pixel, so
+	// the bounding box of the single top-left pixel in the image is:
+	// (0,0)->(1,1).
+	// If an image rectangle has been set in the API, then returned coordinates
+	// relate to the original (full) image, rather than the rectangle.
+
+	/**
+	 * Returns the bounding rectangle of the current object at the given level in
+	 * the coordinates of the working image that is pix_binary().
+	 * See comment on coordinate system above.
+	 * Returns false if there is no such object at the current position.
+	 */
+	bool PageIterator::BoundingBoxInternal(PageIteratorLevel level,
+		int* left, int* top,
+		int* right, int* bottom) const {
+		if (Empty(level))
+			return false;
+		TBOX box;
+		PARA *para = NULL;
+		switch (level) {
+		case RIL_BLOCK:
+			box = it_->block()->block->restricted_bounding_box(include_upper_dots_,
+				include_lower_dots_);
+			break;
+		case RIL_PARA:
+			para = it_->row()->row->para();
+			// explicit fall-through.
+		case RIL_TEXTLINE:
+			box = it_->row()->row->restricted_bounding_box(include_upper_dots_,
+				include_lower_dots_);
+			break;
+		case RIL_WORD:
+			box = it_->word()->word->restricted_bounding_box(include_upper_dots_,
+				include_lower_dots_);
+			break;
+		case RIL_SYMBOL:
+			if (cblob_it_ == NULL)
+				box = it_->word()->box_word->BlobBox(blob_index_);
+			else
+				box = cblob_it_->data()->bounding_box();
+		}
+		if (level == RIL_PARA) {
+			PageIterator other = *this;
+			other.Begin();
+			do {
+				if (other.it_->block() &&
+					other.it_->block()->block == it_->block()->block &&
+					other.it_->row() && other.it_->row()->row &&
+					other.it_->row()->row->para() == para) {
+					box = box.bounding_union(other.it_->row()->row->bounding_box());
+				}
+			} while (other.Next(RIL_TEXTLINE));
+		}
+		if (level != RIL_SYMBOL || cblob_it_ != NULL)
+			box.rotate(it_->block()->block->re_rotation());
+		// Now we have a box in tesseract coordinates relative to the image rectangle,
+		// we have to convert the coords to a top-down system.
+		const int pix_height = pixGetHeight(tesseract_->pix_binary());
+		const int pix_width = pixGetWidth(tesseract_->pix_binary());
+		*left = ClipToRange(static_cast<int>(box.left()), 0, pix_width);
+		*top = ClipToRange(pix_height - box.top(), 0, pix_height);
+		*right = ClipToRange(static_cast<int>(box.right()), *left, pix_width);
+		*bottom = ClipToRange(pix_height - box.bottom(), *top, pix_height);
+		return true;
+	}
+
+	/**
+	 * Returns the bounding rectangle of the current object at the given level in
+	 * coordinates of the original image.
+	 * See comment on coordinate system above.
+	 * Returns false if there is no such object at the current position.
+	 */
+	bool PageIterator::BoundingBox(PageIteratorLevel level,
+		int* left, int* top,
+		int* right, int* bottom) const {
+		return BoundingBox(level, 0, left, top, right, bottom);
+	}
+
+	bool PageIterator::BoundingBox(PageIteratorLevel level, const int padding,
+		int* left, int* top,
+		int* right, int* bottom) const {
+		if (!BoundingBoxInternal(level, left, top, right, bottom))
+			return false;
+		// Convert to the coordinate system of the original image.
+		*left = ClipToRange(*left / scale_ + rect_left_ - padding,
+			rect_left_, rect_left_ + rect_width_);
+		*top = ClipToRange(*top / scale_ + rect_top_ - padding,
+			rect_top_, rect_top_ + rect_height_);
+		*right = ClipToRange((*right + scale_ - 1) / scale_ + rect_left_ + padding,
+			*left, rect_left_ + rect_width_);
+		*bottom = ClipToRange((*bottom + scale_ - 1) / scale_ + rect_top_ + padding,
+			*top, rect_top_ + rect_height_);
+		return true;
+	}
+
+	/** Return that there is no such object at a given level. */
+	bool PageIterator::Empty(PageIteratorLevel level) const {
+		if (it_->block() == NULL) return true;  // Already at the end!
+		if (it_->word() == NULL && level != RIL_BLOCK) return true;  // image block
+		if (level == RIL_SYMBOL && blob_index_ >= word_length_)
+			return true;  // Zero length word, or already at the end of it.
+		return false;
+	}
+
+	/** Returns the type of the current block. See apitypes.h for PolyBlockType. */
+	PolyBlockType PageIterator::BlockType() const {
+		if (it_->block() == NULL || it_->block()->block == NULL)
+			return PT_UNKNOWN;  // Already at the end!
+		if (it_->block()->block->poly_block() == NULL)
+			return PT_FLOWING_TEXT;  // No layout analysis used - assume text.
+		return it_->block()->block->poly_block()->isA();
+	}
+
+	/** Returns the polygon outline of the current block. The returned Pta must
+	 *  be ptaDestroy-ed after use. */
+	Pta* PageIterator::BlockPolygon() const {
+		if (it_->block() == NULL || it_->block()->block == NULL)
+			return NULL;  // Already at the end!
+		if (it_->block()->block->poly_block() == NULL)
+			return NULL;  // No layout analysis used - no polygon.
+		ICOORDELT_IT it(it_->block()->block->poly_block()->points());
+		Pta* pta = ptaCreate(it.length());
+		int num_pts = 0;
+		for (it.mark_cycle_pt(); !it.cycled_list(); it.forward(), ++num_pts) {
+			ICOORD* pt = it.data();
+			// Convert to top-down coords within the input image.
+			float x = static_cast<float>(pt->x()) / scale_ + rect_left_;
+			float y = rect_top_ + rect_height_ - static_cast<float>(pt->y()) / scale_;
+			ptaAddPt(pta, x, y);
+		}
+		return pta;
+	}
+
+	/**
+	 * Returns a binary image of the current object at the given level.
+	 * The position and size match the return from BoundingBoxInternal, and so this
+	 * could be upscaled with respect to the original input image.
+	 * Use pixDestroy to delete the image after use.
+	 * The following methods are used to generate the images:
+	 * RIL_BLOCK: mask the page image with the block polygon.
+	 * RIL_TEXTLINE: Clip the rectangle of the line box from the page image.
+	 * TODO(rays) fix this to generate and use a line polygon.
+	 * RIL_WORD: Clip the rectangle of the word box from the page image.
+	 * RIL_SYMBOL: Render the symbol outline to an image for cblobs (prior
+	 * to recognition) or the bounding box otherwise.
+	 * A reconstruction of the original image (using xor to check for double
+	 * representation) should be reasonably accurate,
+	 * apart from removed noise, at the block level. Below the block level, the
+	 * reconstruction will be missing images and line separators.
+	 * At the symbol level, kerned characters will be invade the bounding box
+	 * if rendered after recognition, making an xor reconstruction inaccurate, but
+	 * an or construction better. Before recognition, symbol-level reconstruction
+	 * should be good, even with xor, since the images come from the connected
+	 * components.
+	 */
+	Pix* PageIterator::GetBinaryImage(PageIteratorLevel level) const {
+		int left, top, right, bottom;
+		if (!BoundingBoxInternal(level, &left, &top, &right, &bottom))
+			return NULL;
+		if (level == RIL_SYMBOL && cblob_it_ != NULL &&
+			cblob_it_->data()->area() != 0)
+			return cblob_it_->data()->render();
+		Box* box = boxCreate(left, top, right - left, bottom - top);
+		Pix* pix = pixClipRectangle(tesseract_->pix_binary(), box, NULL);
+		boxDestroy(&box);
+		if (level == RIL_BLOCK || level == RIL_PARA) {
+			// Clip to the block polygon as well.
+			TBOX mask_box;
+			Pix* mask = it_->block()->block->render_mask(&mask_box);
+			int mask_x = left - mask_box.left();
+			int mask_y = top - (tesseract_->ImageHeight() - mask_box.top());
+			// AND the mask and pix, putting the result in pix.
+			pixRasterop(pix, MAX(0, -mask_x), MAX(0, -mask_y), pixGetWidth(pix),
+				pixGetHeight(pix), PIX_SRC & PIX_DST, mask, MAX(0, mask_x),
+				MAX(0, mask_y));
+			pixDestroy(&mask);
+		}
+		return pix;
+	}
+
+	/**
+	 * Returns an image of the current object at the given level in greyscale
+	 * if available in the input. To guarantee a binary image use BinaryImage.
+	 * NOTE that in order to give the best possible image, the bounds are
+	 * expanded slightly over the binary connected component, by the supplied
+	 * padding, so the top-left position of the returned image is returned
+	 * in (left,top). These will most likely not match the coordinates
+	 * returned by BoundingBox.
+	 * If you do not supply an original image, you will get a binary one.
+	 * Use pixDestroy to delete the image after use.
+	 */
+	Pix* PageIterator::GetImage(PageIteratorLevel level, int padding,
+		Pix* original_img,
+		int* left, int* top) const {
+		int right, bottom;
+		if (!BoundingBox(level, left, top, &right, &bottom))
+			return NULL;
+		if (original_img == NULL)
+			return GetBinaryImage(level);
+
+		// Expand the box.
+		*left = MAX(*left - padding, 0);
+		*top = MAX(*top - padding, 0);
+		right = MIN(right + padding, rect_width_);
+		bottom = MIN(bottom + padding, rect_height_);
+		Box* box = boxCreate(*left, *top, right - *left, bottom - *top);
+		Pix* grey_pix = pixClipRectangle(original_img, box, NULL);
+		boxDestroy(&box);
+		if (level == RIL_BLOCK || level == RIL_PARA) {
+			// Clip to the block polygon as well.
+			TBOX mask_box;
+			Pix* mask = it_->block()->block->render_mask(&mask_box);
+			// Copy the mask registered correctly into an image the size of grey_pix.
+			int mask_x = *left - mask_box.left();
+			int mask_y = *top - (pixGetHeight(original_img) - mask_box.top());
+			int width = pixGetWidth(grey_pix);
+			int height = pixGetHeight(grey_pix);
+			Pix* resized_mask = pixCreate(width, height, 1);
+			pixRasterop(resized_mask, MAX(0, -mask_x), MAX(0, -mask_y), width, height,
+				PIX_SRC, mask, MAX(0, mask_x), MAX(0, mask_y));
+			pixDestroy(&mask);
+			pixDilateBrick(resized_mask, resized_mask, 2 * padding + 1,
+				2 * padding + 1);
+			pixInvert(resized_mask, resized_mask);
+			pixSetMasked(grey_pix, resized_mask, MAX_UINT32);
+			pixDestroy(&resized_mask);
+		}
+		return grey_pix;
+	}
+
+	/**
+	 * Returns the baseline of the current object at the given level.
+	 * The baseline is the line that passes through (x1, y1) and (x2, y2).
+	 * WARNING: with vertical text, baselines may be vertical!
+	 */
+	bool PageIterator::Baseline(PageIteratorLevel level,
+		int* x1, int* y1, int* x2, int* y2) const {
+		if (it_->word() == NULL) return false;  // Already at the end!
+		ROW* row = it_->row()->row;
+		WERD* word = it_->word()->word;
+		TBOX box = (level == RIL_WORD || level == RIL_SYMBOL)
+			? word->bounding_box()
+			: row->bounding_box();
+		int left = box.left();
+		ICOORD startpt(left, static_cast<inT16>(row->base_line(left) + 0.5));
+		int right = box.right();
+		ICOORD endpt(right, static_cast<inT16>(row->base_line(right) + 0.5));
+		// Rotate to image coordinates and convert to global image coords.
+		startpt.rotate(it_->block()->block->re_rotation());
+		endpt.rotate(it_->block()->block->re_rotation());
+		*x1 = startpt.x() / scale_ + rect_left_;
+		*y1 = (rect_height_ - startpt.y()) / scale_ + rect_top_;
+		*x2 = endpt.x() / scale_ + rect_left_;
+		*y2 = (rect_height_ - endpt.y()) / scale_ + rect_top_;
+		return true;
+	}
+
+	void PageIterator::Orientation(tesseract::Orientation *orientation,
+		tesseract::WritingDirection *writing_direction,
+		tesseract::TextlineOrder *textline_order,
+		float *deskew_angle) const {
+		BLOCK* block = it_->block()->block;
+
+		// Orientation
+		FCOORD up_in_image(0.0, 1.0);
+		up_in_image.unrotate(block->classify_rotation());
+		up_in_image.rotate(block->re_rotation());
+
+		if (up_in_image.x() == 0.0F) {
+			if (up_in_image.y() > 0.0F) {
+				*orientation = ORIENTATION_PAGE_UP;
+			}
+			else {
+				*orientation = ORIENTATION_PAGE_DOWN;
+			}
+		}
+		else if (up_in_image.x() > 0.0F) {
+			*orientation = ORIENTATION_PAGE_RIGHT;
+		}
+		else {
+			*orientation = ORIENTATION_PAGE_LEFT;
+		}
+		
+        return;
+		// Writing direction
+		bool is_vertical_text = (block->classify_rotation().x() == 0.0);
+		bool right_to_left = block->right_to_left();
+		*writing_direction =
+			is_vertical_text
+			? WRITING_DIRECTION_TOP_TO_BOTTOM
+			: (right_to_left
+				? WRITING_DIRECTION_RIGHT_TO_LEFT
+				: WRITING_DIRECTION_LEFT_TO_RIGHT);
+
+		// Textline Order
+		bool is_mongolian = false;  // TODO(eger): fix me
+		*textline_order = is_vertical_text
+			? (is_mongolian
+				? TEXTLINE_ORDER_LEFT_TO_RIGHT
+				: TEXTLINE_ORDER_RIGHT_TO_LEFT)
+			: TEXTLINE_ORDER_TOP_TO_BOTTOM;
+
+		// Deskew angle
+		FCOORD skew = block->skew();  // true horizontal for textlines
+		*deskew_angle = -skew.angle();
+	}
+
+	void PageIterator::ParagraphInfo(tesseract::ParagraphJustification *just,
+		bool *is_list_item,
+		bool *is_crown,
+		int *first_line_indent) const {
+		*just = tesseract::JUSTIFICATION_UNKNOWN;
+		if (!it_->row() || !it_->row()->row || !it_->row()->row->para() ||
+			!it_->row()->row->para()->model)
+			return;
+
+		PARA *para = it_->row()->row->para();
+		*is_list_item = para->is_list_item;
+		*is_crown = para->is_very_first_or_continuation;
+		*first_line_indent = para->model->first_indent() -
+			para->model->body_indent();
+		*just = para->model->justification();
+	}
+
+	/**
+	 * Sets up the internal data for iterating the blobs of a new word, then
+	 * moves the iterator to the given offset.
+	 */
+	void PageIterator::BeginWord(int offset) {
+		WERD_RES* word_res = it_->word();
+		if (word_res == NULL) {
+			// This is a non-text block, so there is no word.
+			word_length_ = 0;
+			blob_index_ = 0;
+			word_ = NULL;
+			return;
+		}
+		if (word_res->best_choice != NULL) {
+			// Recognition has been done, so we are using the box_word, which
+			// is already baseline denormalized.
+			word_length_ = word_res->best_choice->length();
+			if (word_res->box_word != NULL) {
+				if (word_res->box_word->length() != word_length_) {
+					tprintf("Corrupted word! best_choice[len=%d] = %s, box_word[len=%d]: ",
+						word_length_, word_res->best_choice->unichar_string().string(),
+						word_res->box_word->length());
+					word_res->box_word->bounding_box().print();
+				}
+				ASSERT_HOST(word_res->box_word->length() == word_length_);
+			}
+			word_ = NULL;
+			// We will be iterating the box_word.
+			delete cblob_it_;
+			cblob_it_ = NULL;
+		}
+		else {
+			// No recognition yet, so a "symbol" is a cblob.
+			word_ = word_res->word;
+			ASSERT_HOST(word_->cblob_list() != NULL);
+			word_length_ = word_->cblob_list()->length();
+			if (cblob_it_ == NULL) cblob_it_ = new C_BLOB_IT;
+			cblob_it_->set_to_list(word_->cblob_list());
+		}
+		for (blob_index_ = 0; blob_index_ < offset; ++blob_index_) {
+			if (cblob_it_ != NULL)
+				cblob_it_->forward();
+		}
+	}
+
+	bool PageIterator::SetWordBlamerBundle(BlamerBundle *blamer_bundle) {
+		if (it_->word() != NULL) {
+			it_->word()->blamer_bundle = blamer_bundle;
+			return true;
+		}
+		else {
+			return false;
+		}
+	}
+
+}  // namespace tesseract.
--- a/hgdriver/3rdparty/hgOCR/include/ccmain/pageiterator.h
+++ b/hgdriver/3rdparty/hgOCR/include/ccmain/pageiterator.h
@ -0,0 +1,364 @@
+///////////////////////////////////////////////////////////////////////
+// File:        pageiterator.h
+// Description: Iterator for tesseract page structure that avoids using
+//              tesseract internal data structures.
+// Author:      Ray Smith
+// Created:     Fri Feb 26 11:01:06 PST 2010
+//
+// (C) Copyright 2010, Google Inc.
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+// http://www.apache.org/licenses/LICENSE-2.0
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+///////////////////////////////////////////////////////////////////////
+
+#ifndef TESSERACT_CCMAIN_PAGEITERATOR_H__
+#define TESSERACT_CCMAIN_PAGEITERATOR_H__
+
+#include "publictypes.h"
+#include "platform.h"
+
+struct BlamerBundle;
+class C_BLOB_IT;
+class PAGE_RES;
+class PAGE_RES_IT;
+class WERD;
+struct Pix;
+struct Pta;
+
+namespace tesseract {
+
+	class Tesseract;
+
+	/**
+	 * Class to iterate over tesseract page structure, providing access to all
+	 * levels of the page hierarchy, without including any tesseract headers or
+	 * having to handle any tesseract structures.
+	 * WARNING! This class points to data held within the TessBaseAPI class, and
+	 * therefore can only be used while the TessBaseAPI class still exists and
+	 * has not been subjected to a call of Init, SetImage, Recognize, Clear, End
+	 * DetectOS, or anything else that changes the internal PAGE_RES.
+	 * See apitypes.h for the definition of PageIteratorLevel.
+	 * See also ResultIterator, derived from PageIterator, which adds in the
+	 * ability to access OCR output with text-specific methods.
+	 */
+
+	class TESS_API PageIterator {
+	public:
+		/**
+		 * page_res and tesseract come directly from the BaseAPI.
+		 * The rectangle parameters are copied indirectly from the Thresholder,
+		 * via the BaseAPI. They represent the coordinates of some rectangle in an
+		 * original image (in top-left-origin coordinates) and therefore the top-left
+		 * needs to be added to any output boxes in order to specify coordinates
+		 * in the original image. See TessBaseAPI::SetRectangle.
+		 * The scale and scaled_yres are in case the Thresholder scaled the image
+		 * rectangle prior to thresholding. Any coordinates in tesseract's image
+		 * must be divided by scale before adding (rect_left, rect_top).
+		 * The scaled_yres indicates the effective resolution of the binary image
+		 * that tesseract has been given by the Thresholder.
+		 * After the constructor, Begin has already been called.
+		 */
+		PageIterator(PAGE_RES* page_res, Tesseract* tesseract,
+			int scale, int scaled_yres,
+			int rect_left, int rect_top,
+			int rect_width, int rect_height);
+		virtual ~PageIterator();
+
+		/**
+		 * Page/ResultIterators may be copied! This makes it possible to iterate over
+		 * all the objects at a lower level, while maintaining an iterator to
+		 * objects at a higher level. These constructors DO NOT CALL Begin, so
+		 * iterations will continue from the location of src.
+		 */
+		PageIterator(const PageIterator& src);
+		const PageIterator& operator=(const PageIterator& src);
+
+		/** Are we positioned at the same location as other? */
+		bool PositionedAtSameWord(const PAGE_RES_IT* other) const;
+
+		// ============= Moving around within the page ============.
+
+		/**
+		 * Moves the iterator to point to the start of the page to begin an
+		 * iteration.
+		 */
+		virtual void Begin();
+
+		/**
+		 * Moves the iterator to the beginning of the paragraph.
+		 * This class implements this functionality by moving it to the zero indexed
+		 * blob of the first (leftmost) word on the first row of the paragraph.
+		 */
+		virtual void RestartParagraph();
+
+		/**
+		 * Return whether this iterator points anywhere in the first textline of a
+		 * paragraph.
+		 */
+		bool IsWithinFirstTextlineOfParagraph() const;
+
+		/**
+		 * Moves the iterator to the beginning of the text line.
+		 * This class implements this functionality by moving it to the zero indexed
+		 * blob of the first (leftmost) word of the row.
+		 */
+		virtual void RestartRow();
+
+		/**
+		 * Moves to the start of the next object at the given level in the
+		 * page hierarchy, and returns false if the end of the page was reached.
+		 * NOTE that RIL_SYMBOL will skip non-text blocks, but all other
+		 * PageIteratorLevel level values will visit each non-text block once.
+		 * Think of non text blocks as containing a single para, with a single line,
+		 * with a single imaginary word.
+		 * Calls to Next with different levels may be freely intermixed.
+		 * This function iterates words in right-to-left scripts correctly, if
+		 * the appropriate language has been loaded into Tesseract.
+		 */
+		virtual bool Next(PageIteratorLevel level);
+
+		/**
+		 * Returns true if the iterator is at the start of an object at the given
+		 * level.
+		 *
+		 * For instance, suppose an iterator it is pointed to the first symbol of the
+		 * first word of the third line of the second paragraph of the first block in
+		 * a page, then:
+		 *   it.IsAtBeginningOf(RIL_BLOCK) = false
+		 *   it.IsAtBeginningOf(RIL_PARA) = false
+		 *   it.IsAtBeginningOf(RIL_TEXTLINE) = true
+		 *   it.IsAtBeginningOf(RIL_WORD) = true
+		 *   it.IsAtBeginningOf(RIL_SYMBOL) = true
+		 */
+		virtual bool IsAtBeginningOf(PageIteratorLevel level) const;
+
+		/**
+		 * Returns whether the iterator is positioned at the last element in a
+		 * given level. (e.g. the last word in a line, the last line in a block)
+		 *
+		 *     Here's some two-paragraph example
+		 *   text.  It starts off innocuously
+		 *   enough but quickly turns bizarre.
+		 *     The author inserts a cornucopia
+		 *   of words to guard against confused
+		 *   references.
+		 *
+		 * Now take an iterator it pointed to the start of "bizarre."
+		 *  it.IsAtFinalElement(RIL_PARA, RIL_SYMBOL) = false
+		 *  it.IsAtFinalElement(RIL_PARA, RIL_WORD) = true
+		 *  it.IsAtFinalElement(RIL_BLOCK, RIL_WORD) = false
+		 */
+		virtual bool IsAtFinalElement(PageIteratorLevel level,
+			PageIteratorLevel element) const;
+
+		/**
+		 * Returns whether this iterator is positioned
+		 *   before other:   -1
+		 *   equal to other:  0
+		 *   after other:     1
+		 */
+		int Cmp(const PageIterator &other) const;
+
+		// ============= Accessing data ==============.
+		// Coordinate system:
+		// Integer coordinates are at the cracks between the pixels.
+		// The top-left corner of the top-left pixel in the image is at (0,0).
+		// The bottom-right corner of the bottom-right pixel in the image is at
+		// (width, height).
+		// Every bounding box goes from the top-left of the top-left contained
+		// pixel to the bottom-right of the bottom-right contained pixel, so
+		// the bounding box of the single top-left pixel in the image is:
+		// (0,0)->(1,1).
+		// If an image rectangle has been set in the API, then returned coordinates
+		// relate to the original (full) image, rather than the rectangle.
+
+		/**
+		 * Controls what to include in a bounding box. Bounding boxes of all levels
+		 * between RIL_WORD and RIL_BLOCK can include or exclude potential diacritics.
+		 * Between layout analysis and recognition, it isn't known where all
+		 * diacritics belong, so this control is used to include or exclude some
+		 * diacritics that are above or below the main body of the word. In most cases
+		 * where the placement is obvious, and after recognition, it doesn't make as
+		 * much difference, as the diacritics will already be included in the word.
+		 */
+		void SetBoundingBoxComponents(bool include_upper_dots,
+			bool include_lower_dots) {
+			include_upper_dots_ = include_upper_dots;
+			include_lower_dots_ = include_lower_dots;
+		}
+
+		/**
+		 * Returns the bounding rectangle of the current object at the given level.
+		 * See comment on coordinate system above.
+		 * Returns false if there is no such object at the current position.
+		 * The returned bounding box is guaranteed to match the size and position
+		 * of the image returned by GetBinaryImage, but may clip foreground pixels
+		 * from a grey image. The padding argument to GetImage can be used to expand
+		 * the image to include more foreground pixels. See GetImage below.
+		 */
+		bool BoundingBox(PageIteratorLevel level,
+			int* left, int* top, int* right, int* bottom) const;
+		bool BoundingBox(PageIteratorLevel level, const int padding,
+			int* left, int* top, int* right, int* bottom) const;
+		/**
+		 * Returns the bounding rectangle of the object in a coordinate system of the
+		 * working image rectangle having its origin at (rect_left_, rect_top_) with
+		 * respect to the original image and is scaled by a factor scale_.
+		 */
+		bool BoundingBoxInternal(PageIteratorLevel level,
+			int* left, int* top, int* right, int* bottom) const;
+
+		/** Returns whether there is no object of a given level. */
+		bool Empty(PageIteratorLevel level) const;
+
+		/**
+		 * Returns the type of the current block. See apitypes.h for
+		 * PolyBlockType.
+		 */
+		PolyBlockType BlockType() const;
+
+		/**
+		 * Returns the polygon outline of the current block. The returned Pta must
+		 * be ptaDestroy-ed after use. Note that the returned Pta lists the vertices
+		 * of the polygon, and the last edge is the line segment between the last
+		 * point and the first point. NULL will be returned if the iterator is
+		 * at the end of the document or layout analysis was not used.
+		 */
+		Pta* BlockPolygon() const;
+
+		/**
+		 * Returns a binary image of the current object at the given level.
+		 * The position and size match the return from BoundingBoxInternal, and so
+		 * this could be upscaled with respect to the original input image.
+		 * Use pixDestroy to delete the image after use.
+		 */
+		Pix* GetBinaryImage(PageIteratorLevel level) const;
+
+		/**
+		 * Returns an image of the current object at the given level in greyscale
+		 * if available in the input. To guarantee a binary image use BinaryImage.
+		 * NOTE that in order to give the best possible image, the bounds are
+		 * expanded slightly over the binary connected component, by the supplied
+		 * padding, so the top-left position of the returned image is returned
+		 * in (left,top). These will most likely not match the coordinates
+		 * returned by BoundingBox.
+		 * If you do not supply an original image, you will get a binary one.
+		 * Use pixDestroy to delete the image after use.
+		 */
+		Pix* GetImage(PageIteratorLevel level, int padding, Pix* original_img,
+			int* left, int* top) const;
+
+		/**
+		 * Returns the baseline of the current object at the given level.
+		 * The baseline is the line that passes through (x1, y1) and (x2, y2).
+		 * WARNING: with vertical text, baselines may be vertical!
+		 * Returns false if there is no baseline at the current position.
+		 */
+		bool Baseline(PageIteratorLevel level,
+			int* x1, int* y1, int* x2, int* y2) const;
+
+		/**
+		 * Returns orientation for the block the iterator points to.
+		 *   orientation, writing_direction, textline_order: see publictypes.h
+		 *   deskew_angle: after rotating the block so the text orientation is
+		 *                 upright, how many radians does one have to rotate the
+		 *                 block anti-clockwise for it to be level?
+		 *                   -Pi/4 <= deskew_angle <= Pi/4
+		 */
+		void Orientation(tesseract::Orientation *orientation,
+			tesseract::WritingDirection *writing_direction,
+			tesseract::TextlineOrder *textline_order,
+			float *deskew_angle) const;
+
+		/**
+		 * Returns information about the current paragraph, if available.
+		 *
+		 *   justification -
+		 *     LEFT if ragged right, or fully justified and script is left-to-right.
+		 *     RIGHT if ragged left, or fully justified and script is right-to-left.
+		 *     unknown if it looks like source code or we have very few lines.
+		 *   is_list_item -
+		 *     true if we believe this is a member of an ordered or unordered list.
+		 *   is_crown -
+		 *     true if the first line of the paragraph is aligned with the other
+		 *     lines of the paragraph even though subsequent paragraphs have first
+		 *     line indents.  This typically indicates that this is the continuation
+		 *     of a previous paragraph or that it is the very first paragraph in
+		 *     the chapter.
+		 *   first_line_indent -
+		 *     For LEFT aligned paragraphs, the first text line of paragraphs of
+		 *     this kind are indented this many pixels from the left edge of the
+		 *     rest of the paragraph.
+		 *     for RIGHT aligned paragraphs, the first text line of paragraphs of
+		 *     this kind are indented this many pixels from the right edge of the
+		 *     rest of the paragraph.
+		 *     NOTE 1: This value may be negative.
+		 *     NOTE 2: if *is_crown == true, the first line of this paragraph is
+		 *             actually flush, and first_line_indent is set to the "common"
+		 *             first_line_indent for subsequent paragraphs in this block
+		 *             of text.
+		 */
+		void ParagraphInfo(tesseract::ParagraphJustification *justification,
+			bool *is_list_item,
+			bool *is_crown,
+			int *first_line_indent) const;
+
+		// If the current WERD_RES (it_->word()) is not NULL, sets the BlamerBundle
+		// of the current word to the given pointer (takes ownership of the pointer)
+		// and returns true.
+		// Can only be used when iterating on the word level.
+		bool SetWordBlamerBundle(BlamerBundle *blamer_bundle);
+
+	protected:
+		/**
+		 * Sets up the internal data for iterating the blobs of a new word, then
+		 * moves the iterator to the given offset.
+		 */
+		TESS_LOCAL void BeginWord(int offset);
+
+		/** Pointer to the page_res owned by the API. */
+		PAGE_RES* page_res_;
+		/** Pointer to the Tesseract object owned by the API. */
+		Tesseract* tesseract_;
+		/**
+		 * The iterator to the page_res_. Owned by this ResultIterator.
+		 * A pointer just to avoid dragging in Tesseract includes.
+		 */
+		PAGE_RES_IT* it_;
+		/**
+		 * The current input WERD being iterated. If there is an output from OCR,
+		 * then word_ is NULL. Owned by the API
+		 */
+		WERD* word_;
+		/** The length of the current word_. */
+		int word_length_;
+		/** The current blob index within the word. */
+		int blob_index_;
+		/**
+		 * Iterator to the blobs within the word. If NULL, then we are iterating
+		 * OCR results in the box_word.
+		 * Owned by this ResultIterator.
+		 */
+		C_BLOB_IT* cblob_it_;
+		/** Control over what to include in bounding boxes. */
+		bool include_upper_dots_;
+		bool include_lower_dots_;
+		/** Parameters saved from the Thresholder. Needed to rebuild coordinates.*/
+		int scale_;
+		int scaled_yres_;
+		int rect_left_;
+		int rect_top_;
+		int rect_width_;
+		int rect_height_;
+	};
+
+}  // namespace tesseract.
+
+#endif  // TESSERACT_CCMAIN_PAGEITERATOR_H__
--- a/hgdriver/3rdparty/hgOCR/include/ccmain/pagesegmain.cpp
+++ b/hgdriver/3rdparty/hgOCR/include/ccmain/pagesegmain.cpp
@ -0,0 +1,434 @@
+/**********************************************************************
+ * File:        pagesegmain.cpp
+ * Description: Top-level page segmenter for Tesseract.
+ * Author:      Ray Smith
+ * Created:     Thu Sep 25 17:12:01 PDT 2008
+ *
+ * (C) Copyright 2008, Google Inc.
+ ** Licensed under the Apache License, Version 2.0 (the "License");
+ ** you may not use this file except in compliance with the License.
+ ** You may obtain a copy of the License at
+ ** http://www.apache.org/licenses/LICENSE-2.0
+ ** Unless required by applicable law or agreed to in writing, software
+ ** distributed under the License is distributed on an "AS IS" BASIS,
+ ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ ** See the License for the specific language governing permissions and
+ ** limitations under the License.
+ *
+ **********************************************************************/
+
+#ifdef _WIN32
+#ifndef unlink
+#include <io.h>
+#endif
+#else
+#include <unistd.h>
+#endif  // _WIN32
+#ifdef _MSC_VER
+#pragma warning(disable:4244)  // Conversion warnings
+#endif
+
+ // Include automatically generated configuration file if running autoconf.
+#ifdef HAVE_CONFIG_H
+#include "config_auto.h"
+#endif
+
+#include "allheaders.h"
+#include "blobbox.h"
+#include "blread.h"
+#include "colfind.h"
+#include "equationdetect.h"
+#include "imagefind.h"
+#include "linefind.h"
+#include "makerow.h"
+#include "osdetect.h"
+#include "tabvector.h"
+#include "tesseractclass.h"
+#include "tessvars.h"
+#include "textord.h"
+#include "tordmain.h"
+#include "wordseg.h"
+
+namespace tesseract {
+
+	// Max erosions to perform in removing an enclosing circle.
+	const int kMaxCircleErosions = 8;
+
+	// Helper to remove an enclosing circle from an image.
+	// If there isn't one, then the image will most likely get badly mangled.
+	// The returned pix must be pixDestroyed after use. NULL may be returned
+	// if the image doesn't meet the trivial conditions that it uses to determine
+	// success.
+	static Pix* RemoveEnclosingCircle(Pix* pixs) {
+		Pix* pixsi = pixInvert(NULL, pixs);
+		Pix* pixc = pixCreateTemplate(pixs);
+		pixSetOrClearBorder(pixc, 1, 1, 1, 1, PIX_SET);
+		pixSeedfillBinary(pixc, pixc, pixsi, 4);
+		pixInvert(pixc, pixc);
+		pixDestroy(&pixsi);
+		Pix* pixt = pixAnd(NULL, pixs, pixc);
+		l_int32 max_count;
+		pixCountConnComp(pixt, 8, &max_count);
+		// The count has to go up before we start looking for the minimum.
+		l_int32 min_count = MAX_INT32;
+		Pix* pixout = NULL;
+		for (int i = 1; i < kMaxCircleErosions; i++) {
+			pixDestroy(&pixt);
+			pixErodeBrick(pixc, pixc, 3, 3);
+			pixt = pixAnd(NULL, pixs, pixc);
+			l_int32 count;
+			pixCountConnComp(pixt, 8, &count);
+			if (i == 1 || count > max_count) {
+				max_count = count;
+				min_count = count;
+			}
+			else if (i > 1 && count < min_count) {
+				min_count = count;
+				pixDestroy(&pixout);
+				pixout = pixCopy(NULL, pixt);  // Save the best.
+			}
+			else if (count >= min_count) {
+				break;  // We have passed by the best.
+			}
+		}
+		pixDestroy(&pixt);
+		pixDestroy(&pixc);
+		return pixout;
+	}
+
+	/**
+	 * Segment the page according to the current value of tessedit_pageseg_mode.
+	 * pix_binary_ is used as the source image and should not be NULL.
+	 * On return the blocks list owns all the constructed page layout.
+	 */
+	int Tesseract::SegmentPage(const STRING* input_file, BLOCK_LIST* blocks,
+		Tesseract* osd_tess, OSResults* osr) {
+		ASSERT_HOST(pix_binary_ != NULL);
+		int width = pixGetWidth(pix_binary_);
+		int height = pixGetHeight(pix_binary_);
+		// Get page segmentation mode.
+		PageSegMode pageseg_mode = static_cast<PageSegMode>(
+			static_cast<int>(tessedit_pageseg_mode));
+		// If a UNLV zone file can be found, use that instead of segmentation.
+		if (!PSM_COL_FIND_ENABLED(pageseg_mode) &&
+			input_file != NULL && input_file->length() > 0) {
+			STRING name = *input_file;
+			const char* lastdot = strrchr(name.string(), '.');
+			if (lastdot != NULL)
+				name[lastdot - name.string()] = '\0';
+			read_unlv_file(name, width, height, blocks);
+		}
+		if (blocks->empty()) {
+			// No UNLV file present. Work according to the PageSegMode.
+			// First make a single block covering the whole image.
+			BLOCK_IT block_it(blocks);
+			BLOCK* block = new BLOCK("", TRUE, 0, 0, 0, 0, width, height);
+			block->set_right_to_left(right_to_left());
+			block_it.add_to_end(block);
+		}
+		else {
+			// UNLV file present. Use PSM_SINGLE_BLOCK.
+			pageseg_mode = PSM_SINGLE_BLOCK;
+		}
+		// The diacritic_blobs holds noise blobs that may be diacritics. They
+		// are separated out on areas of the image that seem noisy and short-circuit
+		// the layout process, going straight from the initial partition creation
+		// right through to after word segmentation, where they are added to the
+		// rej_cblobs list of the most appropriate word. From there classification
+		// will determine whether they are used.
+		BLOBNBOX_LIST diacritic_blobs;
+		int auto_page_seg_ret_val = 0;
+		TO_BLOCK_LIST to_blocks;
+		if (PSM_OSD_ENABLED(pageseg_mode) || PSM_BLOCK_FIND_ENABLED(pageseg_mode) ||
+			PSM_SPARSE(pageseg_mode)) {
+			auto_page_seg_ret_val = AutoPageSeg(
+				pageseg_mode, blocks, &to_blocks,
+				enable_noise_removal ? &diacritic_blobs : NULL, osd_tess, osr);
+			if (pageseg_mode == PSM_OSD_ONLY)
+				return auto_page_seg_ret_val;
+			// To create blobs from the image region bounds uncomment this line:
+			//  to_blocks.clear();  // Uncomment to go back to the old mode.
+		}
+		else {
+			deskew_ = FCOORD(1.0f, 0.0f);
+			reskew_ = FCOORD(1.0f, 0.0f);
+			if (pageseg_mode == PSM_CIRCLE_WORD) {
+				Pix* pixcleaned = RemoveEnclosingCircle(pix_binary_);
+				if (pixcleaned != NULL) {
+					pixDestroy(&pix_binary_);
+					pix_binary_ = pixcleaned;
+				}
+			}
+		}
+
+		if (auto_page_seg_ret_val < 0) {
+			return -1;
+		}
+
+		if (blocks->empty()) {
+			if (textord_debug_tabfind)
+				tprintf("Empty page\n");
+			return 0;  // AutoPageSeg found an empty page.
+		}
+		bool splitting =
+			pageseg_devanagari_split_strategy != ShiroRekhaSplitter::NO_SPLIT;
+		bool cjk_mode = textord_use_cjk_fp_model;
+
+		textord_.TextordPage(pageseg_mode, reskew_, width, height, pix_binary_,
+			pix_thresholds_, pix_grey_, splitting || cjk_mode,
+			&diacritic_blobs, blocks, &to_blocks);
+		return auto_page_seg_ret_val;
+	}
+
+	// Helper writes a grey image to a file for use by scrollviewer.
+	// Normally for speed we don't display the image in the layout debug windows.
+	// If textord_debug_images is true, we draw the image as a background to some
+	// of the debug windows. printable determines whether these
+	// images are optimized for printing instead of screen display.
+	static void WriteDebugBackgroundImage(bool printable, Pix* pix_binary) {
+		Pix* grey_pix = pixCreate(pixGetWidth(pix_binary),
+			pixGetHeight(pix_binary), 8);
+		// Printable images are light grey on white, but for screen display
+		// they are black on dark grey so the other colors show up well.
+		if (printable) {
+			pixSetAll(grey_pix);
+			pixSetMasked(grey_pix, pix_binary, 192);
+		}
+		else {
+			pixSetAllArbitrary(grey_pix, 64);
+			pixSetMasked(grey_pix, pix_binary, 0);
+		}
+		AlignedBlob::IncrementDebugPix();
+		pixWrite(AlignedBlob::textord_debug_pix().string(), grey_pix, IFF_PNG);
+		pixDestroy(&grey_pix);
+	}
+
+	/**
+	 * Auto page segmentation. Divide the page image into blocks of uniform
+	 * text linespacing and images.
+	 *
+	 * Resolution (in ppi) is derived from the input image.
+	 *
+	 * The output goes in the blocks list with corresponding TO_BLOCKs in the
+	 * to_blocks list.
+	 *
+	 * If !PSM_COL_FIND_ENABLED(pageseg_mode), then no attempt is made to divide
+	 * the image into columns, but multiple blocks are still made if the text is
+	 * of non-uniform linespacing.
+	 *
+	 * If diacritic_blobs is non-null, then diacritics/noise blobs, that would
+	 * confuse layout anaylsis by causing textline overlap, are placed there,
+	 * with the expectation that they will be reassigned to words later and
+	 * noise/diacriticness determined via classification.
+	 *
+	 * If osd (orientation and script detection) is true then that is performed
+	 * as well. If only_osd is true, then only orientation and script detection is
+	 * performed. If osd is desired, (osd or only_osd) then osr_tess must be
+	 * another Tesseract that was initialized especially for osd, and the results
+	 * will be output into osr (orientation and script result).
+	 */
+	int Tesseract::AutoPageSeg(PageSegMode pageseg_mode, BLOCK_LIST* blocks,
+		TO_BLOCK_LIST* to_blocks,
+		BLOBNBOX_LIST* diacritic_blobs, Tesseract* osd_tess,
+		OSResults* osr) {
+		if (textord_debug_images) {
+			WriteDebugBackgroundImage(textord_debug_printable, pix_binary_);
+		}
+		Pix* photomask_pix = NULL;
+		Pix* musicmask_pix = NULL;
+		// The blocks made by the ColumnFinder. Moved to blocks before return.
+		BLOCK_LIST found_blocks;
+		TO_BLOCK_LIST temp_blocks;
+
+		ColumnFinder* finder = SetupPageSegAndDetectOrientation(
+			pageseg_mode, blocks, osd_tess, osr, &temp_blocks, &photomask_pix,
+			&musicmask_pix);
+		int result = 0;
+		if (finder != NULL) {
+			TO_BLOCK_IT to_block_it(&temp_blocks);
+			TO_BLOCK* to_block = to_block_it.data();
+			if (musicmask_pix != NULL) {
+				// TODO(rays) pass the musicmask_pix into FindBlocks and mark music
+				// blocks separately. For now combine with photomask_pix.
+				pixOr(photomask_pix, photomask_pix, musicmask_pix);
+			}
+			if (equ_detect_) {
+				finder->SetEquationDetect(equ_detect_);
+			}
+            /* ÆÁ±ÎÍáÐ±½Ç¶ÈÊ¶±ð
+			result = finder->FindBlocks(
+				pageseg_mode, scaled_color_, scaled_factor_, to_block, photomask_pix,
+				pix_thresholds_, pix_grey_, &found_blocks, diacritic_blobs, to_blocks);
+			if (result >= 0)
+				finder->GetDeskewVectors(&deskew_, &reskew_);
+            */
+			delete finder;
+		}
+		pixDestroy(&photomask_pix);
+		pixDestroy(&musicmask_pix);
+		if (result < 0) return result;
+
+		blocks->clear();
+		BLOCK_IT block_it(blocks);
+		// Move the found blocks to the input/output blocks.
+		block_it.add_list_after(&found_blocks);
+
+		if (textord_debug_images) {
+			// The debug image is no longer needed so delete it.
+			unlink(AlignedBlob::textord_debug_pix().string());
+		}
+		return result;
+	}
+
+	// Helper adds all the scripts from sid_set converted to ids from osd_set to
+	// allowed_ids.
+	static void AddAllScriptsConverted(const UNICHARSET& sid_set,
+		const UNICHARSET& osd_set,
+		GenericVector<int>* allowed_ids) {
+		for (int i = 0; i < sid_set.get_script_table_size(); ++i) {
+			if (i != sid_set.null_sid()) {
+				const char* script = sid_set.get_script_from_script_id(i);
+				allowed_ids->push_back(osd_set.get_script_id_from_name(script));
+			}
+		}
+	}
+
+	/**
+	 * Sets up auto page segmentation, determines the orientation, and corrects it.
+	 * Somewhat arbitrary chunk of functionality, factored out of AutoPageSeg to
+	 * facilitate testing.
+	 * photo_mask_pix is a pointer to a NULL pointer that will be filled on return
+	 * with the leptonica photo mask, which must be pixDestroyed by the caller.
+	 * to_blocks is an empty list that will be filled with (usually a single)
+	 * block that is used during layout analysis. This ugly API is required
+	 * because of the possibility of a unlv zone file.
+	 * TODO(rays) clean this up.
+	 * See AutoPageSeg for other arguments.
+	 * The returned ColumnFinder must be deleted after use.
+	 */
+	ColumnFinder* Tesseract::SetupPageSegAndDetectOrientation(
+		PageSegMode pageseg_mode, BLOCK_LIST* blocks, Tesseract* osd_tess,
+		OSResults* osr, TO_BLOCK_LIST* to_blocks, Pix** photo_mask_pix,
+		Pix** music_mask_pix) {
+		int vertical_x = 0;
+		int vertical_y = 1;
+		TabVector_LIST v_lines;
+		TabVector_LIST h_lines;
+		ICOORD bleft(0, 0);
+
+		ASSERT_HOST(pix_binary_ != NULL);
+		if (tessedit_dump_pageseg_images) {
+			pixWrite("tessinput.png", pix_binary_, IFF_PNG);
+		}
+		// Leptonica is used to find the rule/separator lines in the input.
+		LineFinder::FindAndRemoveLines(source_resolution_,
+			textord_tabfind_show_vlines, pix_binary_,
+			&vertical_x, &vertical_y, music_mask_pix,
+			&v_lines, &h_lines);
+		if (tessedit_dump_pageseg_images)
+			pixWrite("tessnolines.png", pix_binary_, IFF_PNG);
+		// Leptonica is used to find a mask of the photo regions in the input.
+		*photo_mask_pix = ImageFind::FindImages(pix_binary_);
+		if (tessedit_dump_pageseg_images)
+			pixWrite("tessnoimages.png", pix_binary_, IFF_PNG);
+		if (!PSM_COL_FIND_ENABLED(pageseg_mode)) v_lines.clear();
+
+		// The rest of the algorithm uses the usual connected components.
+		textord_.find_components(pix_binary_, blocks, to_blocks);
+
+		TO_BLOCK_IT to_block_it(to_blocks);
+		// There must be exactly one input block.
+		// TODO(rays) handle new textline finding with a UNLV zone file.
+		ASSERT_HOST(to_blocks->singleton());
+		TO_BLOCK* to_block = to_block_it.data();
+		TBOX blkbox = to_block->block->bounding_box();
+		ColumnFinder* finder = NULL;
+
+		if (to_block->line_size >= 2) {
+			finder = new ColumnFinder(static_cast<int>(to_block->line_size),
+				blkbox.botleft(), blkbox.topright(),
+				source_resolution_, textord_use_cjk_fp_model,
+				textord_tabfind_aligned_gap_fraction,
+				&v_lines, &h_lines, vertical_x, vertical_y);
+
+			finder->SetupAndFilterNoise(pageseg_mode, *photo_mask_pix, to_block);
+
+			if (equ_detect_) {
+				equ_detect_->LabelSpecialText(to_block);
+			}
+
+			BLOBNBOX_CLIST osd_blobs;
+			// osd_orientation is the number of 90 degree rotations to make the
+			// characters upright. (See osdetect.h for precise definition.)
+			// We want the text lines horizontal, (vertical text indicates vertical
+			// textlines) which may conflict (eg vertically written CJK).
+			int osd_orientation = 0;
+			bool vertical_text = textord_tabfind_force_vertical_text ||
+				pageseg_mode == PSM_SINGLE_BLOCK_VERT_TEXT;
+			if (!vertical_text && textord_tabfind_vertical_text &&
+				PSM_ORIENTATION_ENABLED(pageseg_mode)) {
+				vertical_text =
+					finder->IsVerticallyAlignedText(textord_tabfind_vertical_text_ratio,
+						to_block, &osd_blobs);
+			}
+			if (PSM_OSD_ENABLED(pageseg_mode) && osd_tess != NULL && osr != NULL) {
+				GenericVector<int> osd_scripts;
+				if (osd_tess != this) {
+					// We are running osd as part of layout analysis, so constrain the
+					// scripts to those allowed by *this.
+					AddAllScriptsConverted(unicharset, osd_tess->unicharset, &osd_scripts);
+					for (int s = 0; s < sub_langs_.size(); ++s) {
+						AddAllScriptsConverted(sub_langs_[s]->unicharset,
+							osd_tess->unicharset, &osd_scripts);
+					}
+				}
+				os_detect_blobs(&osd_scripts, &osd_blobs, osr, osd_tess);
+				if (pageseg_mode == PSM_OSD_ONLY) {
+					delete finder;
+					return NULL;
+				}
+				osd_orientation = osr->best_result.orientation_id;
+				double osd_score = osr->orientations[osd_orientation];
+				double osd_margin = min_orientation_margin * 2;
+				for (int i = 0; i < 4; ++i) {
+					if (i != osd_orientation &&
+						osd_score - osr->orientations[i] < osd_margin) {
+						osd_margin = osd_score - osr->orientations[i];
+					}
+				}
+				int best_script_id = osr->best_result.script_id;
+				const char* best_script_str =
+					osd_tess->unicharset.get_script_from_script_id(best_script_id);
+				bool cjk = best_script_id == osd_tess->unicharset.han_sid() ||
+					best_script_id == osd_tess->unicharset.hiragana_sid() ||
+					best_script_id == osd_tess->unicharset.katakana_sid() ||
+					strcmp("Japanese", best_script_str) == 0 ||
+					strcmp("Korean", best_script_str) == 0 ||
+					strcmp("Hangul", best_script_str) == 0;
+				if (cjk) {
+					finder->set_cjk_script(true);
+				}
+				if (osd_margin < min_orientation_margin) {
+					// The margin is weak.
+					if (!cjk && !vertical_text && osd_orientation == 2) {
+						// upside down latin text is improbable with such a weak margin.
+						tprintf("OSD: Weak margin (%.2f), horiz textlines, not CJK: "
+							"Don't rotate.\n", osd_margin);
+						osd_orientation = 0;
+					}
+					else {
+						tprintf(
+							"OSD: Weak margin (%.2f) for %d blob text block, "
+							"but using orientation anyway: %d\n",
+							osd_margin, osd_blobs.length(), osd_orientation);
+					}
+				}
+			}
+			osd_blobs.shallow_clear();
+			finder->CorrectOrientation(to_block, vertical_text, osd_orientation);
+		}
+
+		return finder;
+	}
+
+}  // namespace tesseract.
--- a/hgdriver/3rdparty/hgOCR/include/ccmain/pagewalk.cpp
+++ b/hgdriver/3rdparty/hgOCR/include/ccmain/pagewalk.cpp
@ -0,0 +1,43 @@
+/**********************************************************************
+ * File:        pagewalk.cpp  (Formerly walkers.c)
+ * Description: Block list processors
+ * Author:      Phil Cheatle
+ * Created:     Thu Oct 10 16:25:24 BST 1991
+ *
+ * (C) Copyright 1991, Hewlett-Packard Ltd.
+ ** Licensed under the Apache License, Version 2.0 (the "License");
+ ** you may not use this file except in compliance with the License.
+ ** You may obtain a copy of the License at
+ ** http://www.apache.org/licenses/LICENSE-2.0
+ ** Unless required by applicable law or agreed to in writing, software
+ ** distributed under the License is distributed on an "AS IS" BASIS,
+ ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ ** See the License for the specific language governing permissions and
+ ** limitations under the License.
+ *
+ **********************************************************************/
+
+#include "pageres.h"
+#include "tesseractclass.h"
+
+namespace tesseract {
+	/**
+	 * @name process_selected_words()
+	 *
+	 * Walk the current block list applying the specified word processor function
+	 * to each word that overlaps the selection_box.
+	 */
+	void Tesseract::process_selected_words(
+		PAGE_RES* page_res, // blocks to check
+		TBOX & selection_box,
+		BOOL8(tesseract::Tesseract::*word_processor)(PAGE_RES_IT* pr_it)) {
+		for (PAGE_RES_IT page_res_it(page_res); page_res_it.word() != NULL;
+			page_res_it.forward()) {
+			WERD* word = page_res_it.word()->word;
+			if (word->bounding_box().overlap(selection_box)) {
+				if (!(this->*word_processor)(&page_res_it))
+					return;
+			}
+		}
+	}
+}  // namespace tesseract
--- a/hgdriver/3rdparty/hgOCR/include/ccmain/par_control.cpp
+++ b/hgdriver/3rdparty/hgOCR/include/ccmain/par_control.cpp
@ -0,0 +1,69 @@
+///////////////////////////////////////////////////////////////////////
+// File:        par_control.cpp
+// Description: Control code for parallel implementation.
+// Author:      Ray Smith
+// Created:     Mon Nov 04 13:23:15 PST 2013
+//
+// (C) Copyright 2013, Google Inc.
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+// http://www.apache.org/licenses/LICENSE-2.0
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+///////////////////////////////////////////////////////////////////////
+
+#include "tesseractclass.h"
+
+namespace tesseract {
+
+	struct BlobData {
+		BlobData() : blob(NULL), choices(NULL) {}
+		BlobData(int index, Tesseract* tess, const WERD_RES& word)
+			: blob(word.chopped_word->blobs[index]),
+			tesseract(tess),
+			choices(&(*word.ratings)(index, index)) {}
+
+		TBLOB* blob;
+		Tesseract* tesseract;
+		BLOB_CHOICE_LIST** choices;
+	};
+
+	void Tesseract::PrerecAllWordsPar(const GenericVector<WordData>& words) {
+		// Prepare all the blobs.
+		GenericVector<BlobData> blobs;
+		for (int w = 0; w < words.size(); ++w) {
+			if (words[w].word->ratings != NULL &&
+				words[w].word->ratings->get(0, 0) == NULL) {
+				for (int s = 0; s < words[w].lang_words.size(); ++s) {
+					Tesseract* sub = s < sub_langs_.size() ? sub_langs_[s] : this;
+					const WERD_RES& word = *words[w].lang_words[s];
+					for (int b = 0; b < word.chopped_word->NumBlobs(); ++b) {
+						blobs.push_back(BlobData(b, sub, word));
+					}
+				}
+			}
+		}
+		// Pre-classify all the blobs.
+		if (tessedit_parallelize > 1) {
+			for (int b = 0; b < blobs.size(); ++b) {
+				*blobs[b].choices =
+					blobs[b].tesseract->classify_blob(blobs[b].blob, "par", White, NULL);
+			}
+		}
+		else {
+			// TODO(AMD) parallelize this.
+			for (int b = 0; b < blobs.size(); ++b) {
+				*blobs[b].choices =
+					blobs[b].tesseract->classify_blob(blobs[b].blob, "par", White, NULL);
+			}
+		}
+	}
+
+}  // namespace tesseract.
+
+
--- a/hgdriver/3rdparty/hgOCR/include/ccmain/paragraphs.cpp
+++ b/hgdriver/3rdparty/hgOCR/include/ccmain/paragraphs.cpp
--- a/hgdriver/3rdparty/hgOCR/include/ccmain/paragraphs.h
+++ b/hgdriver/3rdparty/hgOCR/include/ccmain/paragraphs.h
@ -0,0 +1,108 @@
+/**********************************************************************
+ * File:        paragraphs.h
+ * Description: Paragraph Detection data structures.
+ * Author:      David Eger
+ * Created:     25 February 2011
+ *
+ * (C) Copyright 2011, Google Inc.
+ ** Licensed under the Apache License, Version 2.0 (the "License");
+ ** you may not use this file except in compliance with the License.
+ ** You may obtain a copy of the License at
+ ** http://www.apache.org/licenses/LICENSE-2.0
+ ** Unless required by applicable law or agreed to in writing, software
+ ** distributed under the License is distributed on an "AS IS" BASIS,
+ ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ ** See the License for the specific language governing permissions and
+ ** limitations under the License.
+ *
+ **********************************************************************/
+
+#ifndef TESSERACT_CCMAIN_PARAGRAPHS_H_
+#define TESSERACT_CCMAIN_PARAGRAPHS_H_
+
+#include "rect.h"
+#include "ocrpara.h"
+#include "genericvector.h"
+#include "strngs.h"
+
+
+class WERD;
+class UNICHARSET;
+
+namespace tesseract {
+
+	class MutableIterator;
+
+	// This structure captures all information needed about a text line for the
+	// purposes of paragraph detection.  It is meant to be exceedingly light-weight
+	// so that we can easily test paragraph detection independent of the rest of
+	// Tesseract.
+	class RowInfo {
+	public:
+		// Constant data derived from Tesseract output.
+		STRING text;        // the full UTF-8 text of the line.
+		bool ltr;           // whether the majority of the text is left-to-right
+							// TODO(eger) make this more fine-grained.
+
+		bool has_leaders;   // does the line contain leader dots (.....)?
+		bool has_drop_cap;  // does the line have a drop cap?
+		int pix_ldistance;  // distance to the left pblock boundary in pixels
+		int pix_rdistance;  // distance to the right pblock boundary in pixels
+		float pix_xheight;  // guessed xheight for the line
+		int average_interword_space; // average space between words in pixels.
+
+		int num_words;
+		TBOX lword_box;     // in normalized (horiz text rows) space
+		TBOX rword_box;     // in normalized (horiz text rows) space
+
+		STRING lword_text;   // the UTF-8 text of the leftmost werd
+		STRING rword_text;   // the UTF-8 text of the rightmost werd
+
+		//   The text of a paragraph typically starts with the start of an idea and
+		// ends with the end of an idea.  Here we define paragraph as something that
+		// may have a first line indent and a body indent which may be different.
+		// Typical words that start an idea are:
+		//   1. Words in western scripts that start with
+		//      a capital letter, for example "The"
+		//   2. Bulleted or numbered list items, for
+		//      example "2."
+		// Typical words which end an idea are words ending in punctuation marks. In
+		// this vocabulary, each list item is represented as a paragraph.
+		bool lword_indicates_list_item;
+		bool lword_likely_starts_idea;
+		bool lword_likely_ends_idea;
+
+		bool rword_indicates_list_item;
+		bool rword_likely_starts_idea;
+		bool rword_likely_ends_idea;
+	};
+
+	// Main entry point for Paragraph Detection Algorithm.
+	//
+	// Given a set of equally spaced textlines (described by row_infos),
+	// Split them into paragraphs.  See http://goto/paragraphstalk
+	//
+	// Output:
+	//   row_owners - one pointer for each row, to the paragraph it belongs to.
+	//   paragraphs - this is the actual list of PARA objects.
+	//   models - the list of paragraph models referenced by the PARA objects.
+	//            caller is responsible for deleting the models.
+	void DetectParagraphs(int debug_level,
+		GenericVector<RowInfo> *row_infos,
+		GenericVector<PARA *> *row_owners,
+		PARA_LIST *paragraphs,
+		GenericVector<ParagraphModel *> *models);
+
+	// Given a MutableIterator to the start of a block, run DetectParagraphs on
+	// that block and commit the results to the underlying ROW and BLOCK structs,
+	// saving the ParagraphModels in models.  Caller owns the models.
+	// We use unicharset during the function to answer questions such as "is the
+	// first letter of this word upper case?"
+	void DetectParagraphs(int debug_level,
+		bool after_text_recognition,
+		const MutableIterator *block_start,
+		GenericVector<ParagraphModel *> *models);
+
+}  // namespace
+
+#endif  // TESSERACT_CCMAIN_PARAGRAPHS_H_
--- a/hgdriver/3rdparty/hgOCR/include/ccmain/paragraphs_internal.h
+++ b/hgdriver/3rdparty/hgOCR/include/ccmain/paragraphs_internal.h
@ -0,0 +1,312 @@
+/**********************************************************************
+ * File:        paragraphs.h
+ * Description: Paragraph Detection internal data structures.
+ * Author:      David Eger
+ * Created:     11 March 2011
+ *
+ * (C) Copyright 2011, Google Inc.
+ ** Licensed under the Apache License, Version 2.0 (the "License");
+ ** you may not use this file except in compliance with the License.
+ ** You may obtain a copy of the License at
+ ** http://www.apache.org/licenses/LICENSE-2.0
+ ** Unless required by applicable law or agreed to in writing, software
+ ** distributed under the License is distributed on an "AS IS" BASIS,
+ ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ ** See the License for the specific language governing permissions and
+ ** limitations under the License.
+ *
+ **********************************************************************/
+
+#ifndef TESSERACT_CCMAIN_PARAGRAPHS_INTERNAL_H_
+#define TESSERACT_CCMAIN_PARAGRAPHS_INTERNAL_H_
+
+#include "paragraphs.h"
+#ifdef _MSC_VER
+#include <string>
+#else
+#include "strings.h"
+#endif
+
+ // NO CODE OUTSIDE OF paragraphs.cpp AND TESTS SHOULD NEED TO ACCESS
+ // DATA STRUCTURES OR FUNCTIONS IN THIS FILE.
+
+class WERD_CHOICE;
+
+namespace tesseract {
+
+	// Return whether the given word is likely to be a list item start word.
+	bool AsciiLikelyListItem(const STRING &word);
+
+	// Return the first Unicode Codepoint from werd[pos].
+	int UnicodeFor(const UNICHARSET *u, const WERD_CHOICE *werd, int pos);
+
+	// Set right word attributes given either a unicharset and werd or a utf8
+	// string.
+	void RightWordAttributes(const UNICHARSET *unicharset, const WERD_CHOICE *werd,
+		const STRING &utf8,
+		bool *is_list, bool *starts_idea, bool *ends_idea);
+
+	// Set left word attributes given either a unicharset and werd or a utf8 string.
+	void LeftWordAttributes(const UNICHARSET *unicharset, const WERD_CHOICE *werd,
+		const STRING &utf8,
+		bool *is_list, bool *starts_idea, bool *ends_idea);
+
+	enum LineType {
+		LT_START = 'S',     // First line of a paragraph.
+		LT_BODY = 'C',      // Continuation line of a paragraph.
+		LT_UNKNOWN = 'U',   // No clues.
+		LT_MULTIPLE = 'M',  // Matches for both LT_START and LT_BODY.
+	};
+
+	// The first paragraph in a page of body text is often un-indented.
+	// This is a typographic convention which is common to indicate either that:
+	// (1) The paragraph is the continuation of a previous paragraph, or
+	// (2) The paragraph is the first paragraph in a chapter.
+	//
+	// I refer to such paragraphs as "crown"s, and the output of the paragraph
+	// detection algorithm attempts to give them the same paragraph model as
+	// the rest of the body text.
+	//
+	// Nonetheless, while building hypotheses, it is useful to mark the lines
+	// of crown paragraphs temporarily as crowns, either aligned left or right.
+	extern const ParagraphModel *kCrownLeft;
+	extern const ParagraphModel *kCrownRight;
+
+	inline bool StrongModel(const ParagraphModel *model) {
+		return model != NULL && model != kCrownLeft && model != kCrownRight;
+	}
+
+	struct LineHypothesis {
+		LineHypothesis() : ty(LT_UNKNOWN), model(NULL) {}
+		LineHypothesis(LineType line_type, const ParagraphModel *m)
+			: ty(line_type), model(m) {}
+		LineHypothesis(const LineHypothesis &other)
+			: ty(other.ty), model(other.model) {}
+
+		bool operator==(const LineHypothesis &other) const {
+			return ty == other.ty && model == other.model;
+		}
+
+		LineType ty;
+		const ParagraphModel *model;
+	};
+
+	class ParagraphTheory;  // Forward Declaration
+
+	typedef GenericVectorEqEq<const ParagraphModel *> SetOfModels;
+
+	// Row Scratch Registers are data generated by the paragraph detection
+	// algorithm based on a RowInfo input.
+	class RowScratchRegisters {
+	public:
+		// We presume row will outlive us.
+		void Init(const RowInfo &row);
+
+		LineType GetLineType() const;
+
+		LineType GetLineType(const ParagraphModel *model) const;
+
+		// Mark this as a start line type, sans model.  This is useful for the
+		// initial marking of probable body lines or paragraph start lines.
+		void SetStartLine();
+
+		// Mark this as a body line type, sans model.  This is useful for the
+		// initial marking of probably body lines or paragraph start lines.
+		void SetBodyLine();
+
+		// Record that this row fits as a paragraph start line in the given model,
+		void AddStartLine(const ParagraphModel *model);
+		// Record that this row fits as a paragraph body line in the given model,
+		void AddBodyLine(const ParagraphModel *model);
+
+		// Clear all hypotheses about this line.
+		void SetUnknown() { hypotheses_.truncate(0); }
+
+		// Append all hypotheses of strong models that match this row as a start.
+		void StartHypotheses(SetOfModels *models) const;
+
+		// Append all hypotheses of strong models matching this row.
+		void StrongHypotheses(SetOfModels *models) const;
+
+		// Append all hypotheses for this row.
+		void NonNullHypotheses(SetOfModels *models) const;
+
+		// Discard any hypotheses whose model is not in the given list.
+		void DiscardNonMatchingHypotheses(const SetOfModels &models);
+
+		// If we have only one hypothesis and that is that this line is a paragraph
+		// start line of a certain model, return that model.  Else return NULL.
+		const ParagraphModel *UniqueStartHypothesis() const;
+
+		// If we have only one hypothesis and that is that this line is a paragraph
+		// body line of a certain model, return that model.  Else return NULL.
+		const ParagraphModel *UniqueBodyHypothesis() const;
+
+		// Return the indentation for the side opposite of the aligned side.
+		int OffsideIndent(tesseract::ParagraphJustification just) const {
+			switch (just) {
+			case tesseract::JUSTIFICATION_RIGHT: return lindent_;
+			case tesseract::JUSTIFICATION_LEFT: return rindent_;
+			default: return lindent_ > rindent_ ? lindent_ : rindent_;
+			}
+		}
+
+		// Return the indentation for the side the text is aligned to.
+		int AlignsideIndent(tesseract::ParagraphJustification just) const {
+			switch (just) {
+			case tesseract::JUSTIFICATION_RIGHT: return rindent_;
+			case tesseract::JUSTIFICATION_LEFT: return lindent_;
+			default: return lindent_ > rindent_ ? lindent_ : rindent_;
+			}
+		}
+
+		// Append header fields to a vector of row headings.
+		static void AppendDebugHeaderFields(GenericVector<STRING> *header);
+
+		// Append data for this row to a vector of debug strings.
+		void AppendDebugInfo(const ParagraphTheory &theory,
+			GenericVector<STRING> *dbg) const;
+
+		const RowInfo *ri_;
+
+		// These four constants form a horizontal box model for the white space
+		// on the edges of each line.  At each point in the algorithm, the following
+		// shall hold:
+		//   ri_->pix_ldistance = lmargin_ + lindent_
+		//   ri_->pix_rdistance = rindent_ + rmargin_
+		int lmargin_;
+		int lindent_;
+		int rindent_;
+		int rmargin_;
+
+	private:
+		// Hypotheses of either LT_START or LT_BODY
+		GenericVectorEqEq<LineHypothesis> hypotheses_;
+	};
+
+	// A collection of convenience functions for wrapping the set of
+	// Paragraph Models we believe correctly model the paragraphs in the image.
+	class ParagraphTheory {
+	public:
+		// We presume models will outlive us, and that models will take ownership
+		// of any ParagraphModel *'s we add.
+		explicit ParagraphTheory(GenericVector<ParagraphModel *> *models)
+			: models_(models) {}
+		GenericVector<ParagraphModel *> &models() { return *models_; }
+		const GenericVector<ParagraphModel *> &models() const { return *models_; }
+
+		// Return an existing model if one that is Comparable() can be found.
+		// Else, allocate a new copy of model to save and return a pointer to it.
+		const ParagraphModel *AddModel(const ParagraphModel &model);
+
+		// Discard any models we've made that are not in the list of used models.
+		void DiscardUnusedModels(const SetOfModels &used_models);
+
+		// Return the set of all non-centered models.
+		void NonCenteredModels(SetOfModels *models);
+
+		// If any of the non-centered paragraph models we know about fit
+		// rows[start, end), return it.  Else NULL.
+		const ParagraphModel *Fits(const GenericVector<RowScratchRegisters> *rows,
+			int start, int end) const;
+
+		int IndexOf(const ParagraphModel *model) const;
+
+	private:
+		GenericVector<ParagraphModel *> *models_;
+		GenericVectorEqEq<ParagraphModel *> models_we_added_;
+	};
+
+	bool ValidFirstLine(const GenericVector<RowScratchRegisters> *rows,
+		int row, const ParagraphModel *model);
+	bool ValidBodyLine(const GenericVector<RowScratchRegisters> *rows,
+		int row, const ParagraphModel *model);
+	bool CrownCompatible(const GenericVector<RowScratchRegisters> *rows,
+		int a, int b, const ParagraphModel *model);
+
+	// A class for smearing Paragraph Model hypotheses to surrounding rows.
+	// The idea here is that StrongEvidenceClassify first marks only exceedingly
+	// obvious start and body rows and constructs models of them.  Thereafter,
+	// we may have left over unmarked lines (mostly end-of-paragraph lines) which
+	// were too short to have much confidence about, but which fit the models we've
+	// constructed perfectly and which we ought to mark.  This class is used to
+	// "smear" our models over the text.
+	class ParagraphModelSmearer {
+	public:
+		ParagraphModelSmearer(GenericVector<RowScratchRegisters> *rows,
+			int row_start, int row_end,
+			ParagraphTheory *theory);
+
+		// Smear forward paragraph models from existing row markings to subsequent
+		// text lines if they fit, and mark any thereafter still unmodeled rows
+		// with any model in the theory that fits them.
+		void Smear();
+
+	private:
+		// Record in open_models_ for rows [start_row, end_row) the list of models
+		// currently open at each row.
+		// A model is still open in a row if some previous row has said model as a
+		// start hypothesis, and all rows since (including this row) would fit as
+		// either a body or start line in that model.
+		void CalculateOpenModels(int row_start, int row_end);
+
+		SetOfModels &OpenModels(int row) {
+			return open_models_[row - row_start_ + 1];
+		}
+
+		ParagraphTheory *theory_;
+		GenericVector<RowScratchRegisters> *rows_;
+		int row_start_;
+		int row_end_;
+
+		// open_models_ corresponds to rows[start_row_ - 1, end_row_]
+		//
+		// open_models_:  Contains models which there was an active (open) paragraph
+		//                as of the previous line and for which the left and right
+		//                indents admit the possibility that this text line continues
+		//                to fit the same model.
+		// TODO(eger): Think about whether we can get rid of "Open" models and just
+		//   use the current hypotheses on RowScratchRegisters.
+		GenericVector<SetOfModels> open_models_;
+	};
+
+	// Clear all hypotheses about lines [start, end) and reset the margins to the
+	// percentile (0..100) value of the left and right row edges for this run of
+	// rows.
+	void RecomputeMarginsAndClearHypotheses(
+		GenericVector<RowScratchRegisters> *rows, int start, int end,
+		int percentile);
+
+	// Return the median inter-word space in rows[row_start, row_end).
+	int InterwordSpace(const GenericVector<RowScratchRegisters> &rows,
+		int row_start, int row_end);
+
+	// Return whether the first word on the after line can fit in the space at
+	// the end of the before line (knowing which way the text is aligned and read).
+	bool FirstWordWouldHaveFit(const RowScratchRegisters &before,
+		const RowScratchRegisters &after,
+		tesseract::ParagraphJustification justification);
+
+	// Return whether the first word on the after line can fit in the space at
+	// the end of the before line (not knowing the text alignment).
+	bool FirstWordWouldHaveFit(const RowScratchRegisters &before,
+		const RowScratchRegisters &after);
+
+	// Do rows[start, end) form a single instance of the given paragraph model?
+	bool RowsFitModel(const GenericVector<RowScratchRegisters> *rows,
+		int start, int end, const ParagraphModel *model);
+
+	// Do the text and geometry of two rows support a paragraph break between them?
+	bool LikelyParagraphStart(const RowScratchRegisters &before,
+		const RowScratchRegisters &after,
+		tesseract::ParagraphJustification j);
+
+	// Given a set of row_owners pointing to PARAs or NULL (no paragraph known),
+	// normalize each row_owner to point to an actual PARA, and output the
+	// paragraphs in order onto paragraphs.
+	void CanonicalizeDetectionResults(
+		GenericVector<PARA *> *row_owners,
+		PARA_LIST *paragraphs);
+
+}  // namespace
+#endif  // TESSERACT_CCMAIN_PARAGRAPHS_INTERNAL_H_
--- a/hgdriver/3rdparty/hgOCR/include/ccmain/paramsd.cpp
+++ b/hgdriver/3rdparty/hgOCR/include/ccmain/paramsd.cpp
@ -0,0 +1,370 @@
+///////////////////////////////////////////////////////////////////////
+// File:        paramsd.cpp
+// Description: Tesseract parameter Editor
+// Author:      Joern Wanke
+// Created:     Wed Jul 18 10:05:01 PDT 2007
+//
+// (C) Copyright 2007, Google Inc.
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+// http://www.apache.org/licenses/LICENSE-2.0
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+///////////////////////////////////////////////////////////////////////
+//
+// The parameters editor is used to edit all the parameters used within
+// tesseract from the ui.
+#ifdef _WIN32
+#else
+#include <stdlib.h>
+#include <stdio.h>
+#endif
+
+#include <map>
+
+// Include automatically generated configuration file if running autoconf.
+#ifdef HAVE_CONFIG_H
+#include "config_auto.h"
+#endif
+
+#ifndef GRAPHICS_DISABLED
+#include "paramsd.h"
+
+
+#include "params.h"
+#include "scrollview.h"
+#include "svmnode.h"
+
+
+#define VARDIR        "configs/" /*parameters files */
+#define MAX_ITEMS_IN_SUBMENU 30
+
+// The following variables should remain static globals, since they
+// are used by debug editor, which uses a single Tesseract instance.
+//
+// Contains the mappings from unique VC ids to their actual pointers.
+static std::map<int, ParamContent*> vcMap;
+static int nrParams = 0;
+static int writeCommands[2];
+
+ELISTIZE(ParamContent)
+
+// Constructors for the various ParamTypes.
+ParamContent::ParamContent(tesseract::StringParam* it) {
+	my_id_ = nrParams;
+	nrParams++;
+	param_type_ = VT_STRING;
+	sIt = it;
+	vcMap[my_id_] = this;
+}
+// Constructors for the various ParamTypes.
+ParamContent::ParamContent(tesseract::IntParam* it) {
+	my_id_ = nrParams;
+	nrParams++;
+	param_type_ = VT_INTEGER;
+	iIt = it;
+	vcMap[my_id_] = this;
+}
+// Constructors for the various ParamTypes.
+ParamContent::ParamContent(tesseract::BoolParam* it) {
+	my_id_ = nrParams;
+	nrParams++;
+	param_type_ = VT_BOOLEAN;
+	bIt = it;
+	vcMap[my_id_] = this;
+}
+// Constructors for the various ParamTypes.
+ParamContent::ParamContent(tesseract::DoubleParam* it) {
+	my_id_ = nrParams;
+	nrParams++;
+	param_type_ = VT_DOUBLE;
+	dIt = it;
+	vcMap[my_id_] = this;
+}
+
+// Gets a VC object identified by its ID.
+ParamContent* ParamContent::GetParamContentById(int id) {
+	return vcMap[id];
+}
+
+// Copy the first N words from the source string to the target string.
+// Words are delimited by "_".
+void ParamsEditor::GetFirstWords(
+	const char *s,  // source string
+	int n,          // number of words
+	char *t         // target string
+) {
+	int full_length = strlen(s);
+	int reqd_len = 0;              // No. of chars requird
+	const char *next_word = s;
+
+	while ((n > 0) && reqd_len < full_length) {
+		reqd_len += strcspn(next_word, "_") + 1;
+		next_word += reqd_len;
+		n--;
+	}
+	strncpy(t, s, reqd_len);
+	t[reqd_len] = '\0';            // ensure null terminal
+}
+
+// Getter for the name.
+const char* ParamContent::GetName() const {
+	if (param_type_ == VT_INTEGER) { return iIt->name_str(); }
+	else if (param_type_ == VT_BOOLEAN) { return bIt->name_str(); }
+	else if (param_type_ == VT_DOUBLE) { return dIt->name_str(); }
+	else if (param_type_ == VT_STRING) { return sIt->name_str(); }
+	else
+		return "ERROR: ParamContent::GetName()";
+}
+
+// Getter for the description.
+const char* ParamContent::GetDescription() const {
+	if (param_type_ == VT_INTEGER) { return iIt->info_str(); }
+	else if (param_type_ == VT_BOOLEAN) { return bIt->info_str(); }
+	else if (param_type_ == VT_DOUBLE) { return dIt->info_str(); }
+	else if (param_type_ == VT_STRING) { return sIt->info_str(); }
+	else return NULL;
+}
+
+// Getter for the value.
+STRING ParamContent::GetValue() const {
+	STRING result;
+	if (param_type_ == VT_INTEGER) {
+		result.add_str_int("", *iIt);
+	}
+	else if (param_type_ == VT_BOOLEAN) {
+		result.add_str_int("", *bIt);
+	}
+	else if (param_type_ == VT_DOUBLE) {
+		result.add_str_double("", *dIt);
+	}
+	else if (param_type_ == VT_STRING) {
+		if (((STRING) * (sIt)).string() != NULL) {
+			result = sIt->string();
+		}
+		else {
+			result = "Null";
+		}
+	}
+	return result;
+}
+
+// Setter for the value.
+void ParamContent::SetValue(const char* val) {
+	// TODO (wanke) Test if the values actually are properly converted.
+	// (Quickly visible impacts?)
+	changed_ = TRUE;
+	if (param_type_ == VT_INTEGER) {
+		iIt->set_value(atoi(val));
+	}
+	else if (param_type_ == VT_BOOLEAN) {
+		bIt->set_value(atoi(val));
+	}
+	else if (param_type_ == VT_DOUBLE) {
+		dIt->set_value(strtod(val, NULL));
+	}
+	else if (param_type_ == VT_STRING) {
+		sIt->set_value(val);
+	}
+}
+
+// Gets the up to the first 3 prefixes from s (split by _).
+// For example, tesseract_foo_bar will be split into tesseract,foo and bar.
+void ParamsEditor::GetPrefixes(const char* s, STRING* level_one,
+	STRING* level_two,
+	STRING* level_three) {
+	char* p = new char[1024];
+	GetFirstWords(s, 1, p);
+	*level_one = p;
+	GetFirstWords(s, 2, p);
+	*level_two = p;
+	GetFirstWords(s, 3, p);
+	*level_three = p;
+	delete[] p;
+}
+
+// Compare two VC objects by their name.
+int ParamContent::Compare(const void* v1, const void* v2) {
+	const ParamContent* one =
+		*reinterpret_cast<const ParamContent* const *>(v1);
+	const ParamContent* two =
+		*reinterpret_cast<const ParamContent* const *>(v2);
+	return strcmp(one->GetName(), two->GetName());
+}
+
+// Find all editable parameters used within tesseract and create a
+// SVMenuNode tree from it.
+// TODO (wanke): This is actually sort of hackish.
+SVMenuNode* ParamsEditor::BuildListOfAllLeaves(tesseract::Tesseract *tess) {
+	SVMenuNode* mr = new SVMenuNode();
+	ParamContent_LIST vclist;
+	ParamContent_IT vc_it(&vclist);
+	// Amount counts the number of entries for a specific char*.
+	// TODO(rays) get rid of the use of std::map.
+	std::map<const char*, int> amount;
+
+	// Add all parameters to a list.
+	int v, i;
+	int num_iterations = (tess->params() == NULL) ? 1 : 2;
+	for (v = 0; v < num_iterations; ++v) {
+		tesseract::ParamsVectors *vec = (v == 0) ? GlobalParams() : tess->params();
+		for (i = 0; i < vec->int_params.size(); ++i) {
+			vc_it.add_after_then_move(new ParamContent(vec->int_params[i]));
+		}
+		for (i = 0; i < vec->bool_params.size(); ++i) {
+			vc_it.add_after_then_move(new ParamContent(vec->bool_params[i]));
+		}
+		for (i = 0; i < vec->string_params.size(); ++i) {
+			vc_it.add_after_then_move(new ParamContent(vec->string_params[i]));
+		}
+		for (i = 0; i < vec->double_params.size(); ++i) {
+			vc_it.add_after_then_move(new ParamContent(vec->double_params[i]));
+		}
+	}
+
+	// Count the # of entries starting with a specific prefix.
+	for (vc_it.mark_cycle_pt(); !vc_it.cycled_list(); vc_it.forward()) {
+		ParamContent* vc = vc_it.data();
+		STRING tag;
+		STRING tag2;
+		STRING tag3;
+
+		GetPrefixes(vc->GetName(), &tag, &tag2, &tag3);
+		amount[tag.string()]++;
+		amount[tag2.string()]++;
+		amount[tag3.string()]++;
+	}
+
+	vclist.sort(ParamContent::Compare);  // Sort the list alphabetically.
+
+	SVMenuNode* other = mr->AddChild("OTHER");
+
+	// go through the list again and this time create the menu structure.
+	vc_it.move_to_first();
+	for (vc_it.mark_cycle_pt(); !vc_it.cycled_list(); vc_it.forward()) {
+		ParamContent* vc = vc_it.data();
+		STRING tag;
+		STRING tag2;
+		STRING tag3;
+		GetPrefixes(vc->GetName(), &tag, &tag2, &tag3);
+
+		if (amount[tag.string()] == 1) {
+			other->AddChild(vc->GetName(), vc->GetId(), vc->GetValue().string(),
+				vc->GetDescription());
+		}
+		else {  // More than one would use this submenu -> create submenu.
+			SVMenuNode* sv = mr->AddChild(tag.string());
+			if ((amount[tag.string()] <= MAX_ITEMS_IN_SUBMENU) ||
+				(amount[tag2.string()] <= 1)) {
+				sv->AddChild(vc->GetName(), vc->GetId(),
+					vc->GetValue().string(), vc->GetDescription());
+			}
+			else {  // Make subsubmenus.
+				SVMenuNode* sv2 = sv->AddChild(tag2.string());
+				sv2->AddChild(vc->GetName(), vc->GetId(),
+					vc->GetValue().string(), vc->GetDescription());
+			}
+		}
+	}
+	return mr;
+}
+
+// Event listener. Waits for SVET_POPUP events and processes them.
+void ParamsEditor::Notify(const SVEvent* sve) {
+	if (sve->type == SVET_POPUP) {  // only catch SVET_POPUP!
+		char* param = sve->parameter;
+		if (sve->command_id == writeCommands[0]) {
+			WriteParams(param, false);
+		}
+		else if (sve->command_id == writeCommands[1]) {
+			WriteParams(param, true);
+		}
+		else {
+			ParamContent* vc = ParamContent::GetParamContentById(
+				sve->command_id);
+			vc->SetValue(param);
+			sv_window_->AddMessage("Setting %s to %s",
+				vc->GetName(), vc->GetValue().string());
+		}
+	}
+}
+
+// Integrate the parameters editor as popupmenu into the existing scrollview
+// window (usually the pg editor). If sv == null, create a new empty
+// empty window and attach the parameters editor to that window (ugly).
+ParamsEditor::ParamsEditor(tesseract::Tesseract* tess,
+	ScrollView* sv) {
+	if (sv == NULL) {
+		const char* name = "ParamEditorMAIN";
+		sv = new ScrollView(name, 1, 1, 200, 200, 300, 200);
+	}
+
+	sv_window_ = sv;
+
+	//Only one event handler per window.
+	//sv->AddEventHandler((SVEventHandler*) this);
+
+	SVMenuNode* svMenuRoot = BuildListOfAllLeaves(tess);
+
+	STRING paramfile;
+	paramfile = tess->datadir;
+	paramfile += VARDIR;             // parameters dir
+	paramfile += "edited";           // actual name
+
+	SVMenuNode* std_menu = svMenuRoot->AddChild("Build Config File");
+
+	writeCommands[0] = nrParams + 1;
+	std_menu->AddChild("All Parameters", writeCommands[0],
+		paramfile.string(), "Config file name?");
+
+	writeCommands[1] = nrParams + 2;
+	std_menu->AddChild("changed_ Parameters Only", writeCommands[1],
+		paramfile.string(), "Config file name?");
+
+	svMenuRoot->BuildMenu(sv, false);
+}
+
+
+// Write all (changed_) parameters to a config file.
+void ParamsEditor::WriteParams(char *filename,
+	bool changes_only) {
+	FILE *fp;                      // input file
+	char msg_str[255];
+	// if file exists
+	if ((fp = fopen(filename, "rb")) != NULL) {
+		fclose(fp);
+		sprintf(msg_str, "Overwrite file " "%s" "? (Y/N)", filename);
+		int a = sv_window_->ShowYesNoDialog(msg_str);
+		if (a == 'n') {
+			return;
+		}  // don't write
+	}
+
+
+	fp = fopen(filename, "wb");  // can we write to it?
+	if (fp == NULL) {
+		sv_window_->AddMessage(
+			"Can't write to file "
+			"%s"
+			"",
+			filename);
+		return;
+	}
+
+	for (std::map<int, ParamContent*>::iterator iter = vcMap.begin();
+		iter != vcMap.end();
+		++iter) {
+		ParamContent* cur = iter->second;
+		if (!changes_only || cur->HasChanged()) {
+			fprintf(fp, "%-25s   %-12s   # %s\n",
+				cur->GetName(), cur->GetValue().string(), cur->GetDescription());
+		}
+	}
+	fclose(fp);
+}
+#endif
--- a/hgdriver/3rdparty/hgOCR/include/ccmain/paramsd.h
+++ b/hgdriver/3rdparty/hgOCR/include/ccmain/paramsd.h
@ -0,0 +1,126 @@
+///////////////////////////////////////////////////////////////////////
+// File:        paramsd.cpp
+// Description: Tesseract parameter editor
+// Author:      Joern Wanke
+// Created:     Wed Jul 18 10:05:01 PDT 2007
+//
+// (C) Copyright 2007, Google Inc.
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+// http://www.apache.org/licenses/LICENSE-2.0
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+///////////////////////////////////////////////////////////////////////
+//
+// Tesseract parameter editor is used to edit all the parameters used
+// within tesseract from the ui.
+#ifndef GRAPHICS_DISABLED
+#ifndef VARABLED_H
+#define VARABLED_H
+
+#include "elst.h"
+#ifndef NO_CUBE_BUILD
+#include "scrollview.h"
+#endif
+#include "params.h"
+#include "tesseractclass.h"
+
+class SVMenuNode;
+
+// A list of all possible parameter types used.
+enum ParamType {
+	VT_INTEGER,
+	VT_BOOLEAN,
+	VT_STRING,
+	VT_DOUBLE
+};
+
+// A rather hackish helper structure which can take any kind of parameter input
+// (defined by ParamType) and do a couple of common operations on them, like
+// comparisond or getting its value. It is used in the context of the
+// ParamsEditor as a bridge from the internal tesseract parameters to the
+// ones displayed by the ScrollView server.
+class ParamContent : public ELIST_LINK {
+public:
+	// Compare two VC objects by their name.
+	static int Compare(const void* v1, const void* v2);
+
+	// Gets a VC object identified by its ID.
+	static ParamContent* GetParamContentById(int id);
+
+	// Constructors for the various ParamTypes.
+	ParamContent() {
+	}
+	explicit ParamContent(tesseract::StringParam* it);
+	explicit ParamContent(tesseract::IntParam* it);
+	explicit ParamContent(tesseract::BoolParam* it);
+	explicit ParamContent(tesseract::DoubleParam* it);
+
+
+	// Getters and Setters.
+	void SetValue(const char* val);
+	STRING GetValue() const;
+	const char* GetName() const;
+	const char* GetDescription() const;
+
+	int GetId() { return my_id_; }
+	bool HasChanged() { return changed_; }
+
+private:
+	// The unique ID of this VC object.
+	int my_id_;
+	// Whether the parameter was changed_ and thus needs to be rewritten.
+	bool changed_;
+	// The actual ParamType of this VC object.
+	ParamType param_type_;
+
+	tesseract::StringParam* sIt;
+	tesseract::IntParam* iIt;
+	tesseract::BoolParam* bIt;
+	tesseract::DoubleParam* dIt;
+};
+
+ELISTIZEH(ParamContent)
+
+// The parameters editor enables the user to edit all the parameters used within
+// tesseract. It can be invoked on its own, but is supposed to be invoked by
+// the program editor.
+class ParamsEditor : public SVEventHandler {
+public:
+	// Integrate the parameters editor as popupmenu into the existing scrollview
+	// window (usually the pg editor). If sv == null, create a new empty
+	// empty window and attach the parameter editor to that window (ugly).
+	explicit ParamsEditor(tesseract::Tesseract*, ScrollView* sv = NULL);
+
+	// Event listener. Waits for SVET_POPUP events and processes them.
+	void Notify(const SVEvent* sve);
+
+private:
+	// Gets the up to the first 3 prefixes from s (split by _).
+	// For example, tesseract_foo_bar will be split into tesseract,foo and bar.
+	void GetPrefixes(const char* s, STRING* level_one,
+		STRING* level_two, STRING* level_three);
+
+	// Gets the first n words (split by _) and puts them in t.
+	// For example, tesseract_foo_bar with N=2 will yield tesseract_foo_.
+	void GetFirstWords(const char *s,  // source string
+		int n,          // number of words
+		char *t);       // target string
+
+// Find all editable parameters used within tesseract and create a
+// SVMenuNode tree from it.
+	SVMenuNode *BuildListOfAllLeaves(tesseract::Tesseract *tess);
+
+	// Write all (changed_) parameters to a config file.
+	void WriteParams(char* filename, bool changes_only);
+
+	ScrollView* sv_window_;
+};
+
+#endif
+#endif
--- a/hgdriver/3rdparty/hgOCR/include/ccmain/pgedit.cpp
+++ b/hgdriver/3rdparty/hgOCR/include/ccmain/pgedit.cpp
--- a/hgdriver/3rdparty/hgOCR/include/ccmain/pgedit.h
+++ b/hgdriver/3rdparty/hgOCR/include/ccmain/pgedit.h
@ -0,0 +1,87 @@
+///////////////////////////////////////////////////////////////////////
+// File:        pgedit.h
+// Description: Page structure file editor
+// Author:      Joern Wanke
+// Created:     Wed Jul 18 10:05:01 PDT 2007
+//
+// (C) Copyright 2007, Google Inc.
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+// http://www.apache.org/licenses/LICENSE-2.0
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+///////////////////////////////////////////////////////////////////////
+
+#ifndef           PGEDIT_H
+#define           PGEDIT_H
+
+#include          "ocrblock.h"
+#include          "ocrrow.h"
+#include          "werd.h"
+#include          "rect.h"
+#include          "params.h"
+#include          "tesseractclass.h"
+
+class ScrollView;
+class SVMenuNode;
+struct SVEvent;
+
+// A small event handler class to process incoming events to
+// this window.
+class PGEventHandler : public SVEventHandler {
+public:
+	PGEventHandler(tesseract::Tesseract* tess) : tess_(tess) {
+	}
+	void Notify(const SVEvent* sve);
+private:
+	tesseract::Tesseract* tess_;
+};
+
+extern BLOCK_LIST *current_block_list;
+extern STRING_VAR_H(editor_image_win_name, "EditorImage",
+	"Editor image window name");
+extern INT_VAR_H(editor_image_xpos, 590, "Editor image X Pos");
+extern INT_VAR_H(editor_image_ypos, 10, "Editor image Y Pos");
+extern INT_VAR_H(editor_image_height, 680, "Editor image height");
+extern INT_VAR_H(editor_image_width, 655, "Editor image width");
+extern INT_VAR_H(editor_image_word_bb_color, BLUE,
+	"Word bounding box colour");
+extern INT_VAR_H(editor_image_blob_bb_color, YELLOW,
+	"Blob bounding box colour");
+extern INT_VAR_H(editor_image_text_color, WHITE, "Correct text colour");
+extern STRING_VAR_H(editor_dbwin_name, "EditorDBWin",
+	"Editor debug window name");
+extern INT_VAR_H(editor_dbwin_xpos, 50, "Editor debug window X Pos");
+extern INT_VAR_H(editor_dbwin_ypos, 500, "Editor debug window Y Pos");
+extern INT_VAR_H(editor_dbwin_height, 24, "Editor debug window height");
+extern INT_VAR_H(editor_dbwin_width, 80, "Editor debug window width");
+extern STRING_VAR_H(editor_word_name, "BlnWords",
+	"BL normalised word window");
+extern INT_VAR_H(editor_word_xpos, 60, "Word window X Pos");
+extern INT_VAR_H(editor_word_ypos, 510, "Word window Y Pos");
+extern INT_VAR_H(editor_word_height, 240, "Word window height");
+extern INT_VAR_H(editor_word_width, 655, "Word window width");
+extern double_VAR_H(editor_smd_scale_factor, 1.0, "Scaling for smd image");
+
+ScrollView* bln_word_window_handle();  //return handle
+void build_image_window(int width, int height);
+void display_bln_lines(ScrollView window,
+	ScrollView::Color colour,
+	float scale_factor,
+	float y_offset,
+	float minx,
+	float maxx);
+//function to call
+void pgeditor_msg(  //message display
+	const char *msg);
+void pgeditor_show_point(  //display coords
+	SVEvent *event);
+//put bln word in       box
+void show_point(PAGE_RES* page_res, float x, float y);
+
+#endif
--- a/hgdriver/3rdparty/hgOCR/include/ccmain/recogtraining.cpp
+++ b/hgdriver/3rdparty/hgOCR/include/ccmain/recogtraining.cpp
@ -0,0 +1,233 @@
+///////////////////////////////////////////////////////////////////////
+// File:        recogtraining.cpp
+// Description: Functions for ambiguity and parameter training.
+// Author:      Daria Antonova
+// Created:     Mon Aug 13 11:26:43 PDT 2009
+//
+// (C) Copyright 2009, Google Inc.
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+// http://www.apache.org/licenses/LICENSE-2.0
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+///////////////////////////////////////////////////////////////////////
+
+#include "tesseractclass.h"
+
+#include "boxread.h"
+#include "control.h"
+#include "cutil.h"
+#include "host.h"
+#include "ratngs.h"
+#include "reject.h"
+#include "stopper.h"
+
+namespace tesseract {
+
+	const inT16 kMaxBoxEdgeDiff = 2;
+
+	// Sets flags necessary for recognition in the training mode.
+	// Opens and returns the pointer to the output file.
+	FILE *Tesseract::init_recog_training(const STRING &fname) {
+		if (tessedit_ambigs_training) {
+			tessedit_tess_adaption_mode.set_value(0);    // turn off adaption
+			tessedit_enable_doc_dict.set_value(0);       // turn off document dictionary
+			// Explore all segmentations.
+			getDict().stopper_no_acceptable_choices.set_value(1);
+		}
+
+		STRING output_fname = fname;
+		const char *lastdot = strrchr(output_fname.string(), '.');
+		if (lastdot != NULL) output_fname[lastdot - output_fname.string()] = '\0';
+		output_fname += ".txt";
+		FILE *output_file = open_file(output_fname.string(), "a+");
+		return output_file;
+	}
+
+	// Copies the bounding box from page_res_it->word() to the given TBOX.
+	bool read_t(PAGE_RES_IT *page_res_it, TBOX *tbox) {
+		while (page_res_it->block() != NULL && page_res_it->word() == NULL)
+			page_res_it->forward();
+
+		if (page_res_it->word() != NULL) {
+			*tbox = page_res_it->word()->word->bounding_box();
+
+			// If tbox->left() is negative, the training image has vertical text and
+			// all the coordinates of bounding boxes of page_res are rotated by 90
+			// degrees in a counterclockwise direction. We need to rotate the TBOX back
+			// in order to compare with the TBOXes of box files.
+			if (tbox->left() < 0) {
+				tbox->rotate(FCOORD(0.0, -1.0));
+			}
+
+			return true;
+		}
+		else {
+			return false;
+		}
+	}
+
+	// This function takes tif/box pair of files and runs recognition on the image,
+	// while making sure that the word bounds that tesseract identified roughly
+	// match to those specified by the input box file. For each word (ngram in a
+	// single bounding box from the input box file) it outputs the ocred result,
+	// the correct label, rating and certainty.
+	void Tesseract::recog_training_segmented(const STRING &fname,
+		PAGE_RES *page_res,
+		volatile ETEXT_DESC *monitor,
+		FILE *output_file) {
+		STRING box_fname = fname;
+		const char *lastdot = strrchr(box_fname.string(), '.');
+		if (lastdot != NULL) box_fname[lastdot - box_fname.string()] = '\0';
+		box_fname += ".box";
+		// ReadNextBox() will close box_file
+		FILE *box_file = open_file(box_fname.string(), "r");
+
+		PAGE_RES_IT page_res_it;
+		page_res_it.page_res = page_res;
+		page_res_it.restart_page();
+		STRING label;
+
+		// Process all the words on this page.
+		TBOX tbox;  // tesseract-identified box
+		TBOX bbox;  // box from the box file
+		bool keep_going;
+		int line_number = 0;
+		int examined_words = 0;
+		do {
+			keep_going = read_t(&page_res_it, &tbox);
+			keep_going &= ReadNextBox(applybox_page, &line_number, box_file, &label,
+				&bbox);
+			// Align bottom left points of the TBOXes.
+			while (keep_going &&
+				!NearlyEqual<int>(tbox.bottom(), bbox.bottom(), kMaxBoxEdgeDiff)) {
+				if (bbox.bottom() < tbox.bottom()) {
+					page_res_it.forward();
+					keep_going = read_t(&page_res_it, &tbox);
+				}
+				else {
+					keep_going = ReadNextBox(applybox_page, &line_number, box_file, &label,
+						&bbox);
+				}
+			}
+			while (keep_going &&
+				!NearlyEqual<int>(tbox.left(), bbox.left(), kMaxBoxEdgeDiff)) {
+				if (bbox.left() > tbox.left()) {
+					page_res_it.forward();
+					keep_going = read_t(&page_res_it, &tbox);
+				}
+				else {
+					keep_going = ReadNextBox(applybox_page, &line_number, box_file, &label,
+						&bbox);
+				}
+			}
+			// OCR the word if top right points of the TBOXes are similar.
+			if (keep_going &&
+				NearlyEqual<int>(tbox.right(), bbox.right(), kMaxBoxEdgeDiff) &&
+				NearlyEqual<int>(tbox.top(), bbox.top(), kMaxBoxEdgeDiff)) {
+				ambigs_classify_and_output(label.string(), &page_res_it, output_file);
+				examined_words++;
+			}
+			page_res_it.forward();
+		} while (keep_going);
+
+		// Set up scripts on all of the words that did not get sent to
+		// ambigs_classify_and_output.  They all should have, but if all the
+		// werd_res's don't get uch_sets, tesseract will crash when you try
+		// to iterate over them. :-(
+		int total_words = 0;
+		for (page_res_it.restart_page(); page_res_it.block() != NULL;
+			page_res_it.forward()) {
+			if (page_res_it.word()) {
+				if (page_res_it.word()->uch_set == NULL)
+					page_res_it.word()->SetupFake(unicharset);
+				total_words++;
+			}
+		}
+		if (examined_words < 0.85 * total_words) {
+			tprintf("TODO(antonova): clean up recog_training_segmented; "
+				" It examined only a small fraction of the ambigs image.\n");
+		}
+		tprintf("recog_training_segmented: examined %d / %d words.\n",
+			examined_words, total_words);
+	}
+
+	// Helper prints the given set of blob choices.
+	static void PrintPath(int length, const BLOB_CHOICE** blob_choices,
+		const UNICHARSET& unicharset,
+		const char *label, FILE *output_file) {
+		float rating = 0.0f;
+		float certainty = 0.0f;
+		for (int i = 0; i < length; ++i) {
+			const BLOB_CHOICE* blob_choice = blob_choices[i];
+			fprintf(output_file, "%s",
+				unicharset.id_to_unichar(blob_choice->unichar_id()));
+			rating += blob_choice->rating();
+			if (certainty > blob_choice->certainty())
+				certainty = blob_choice->certainty();
+		}
+		fprintf(output_file, "\t%s\t%.4f\t%.4f\n",
+			label, rating, certainty);
+	}
+
+	// Helper recursively prints all paths through the ratings matrix, starting
+	// at column col.
+	static void PrintMatrixPaths(int col, int dim,
+		const MATRIX& ratings,
+		int length, const BLOB_CHOICE** blob_choices,
+		const UNICHARSET& unicharset,
+		const char *label, FILE *output_file) {
+		for (int row = col; row < dim && row - col < ratings.bandwidth(); ++row) {
+			if (ratings.get(col, row) != NOT_CLASSIFIED) {
+				BLOB_CHOICE_IT bc_it(ratings.get(col, row));
+				for (bc_it.mark_cycle_pt(); !bc_it.cycled_list(); bc_it.forward()) {
+					blob_choices[length] = bc_it.data();
+					if (row + 1 < dim) {
+						PrintMatrixPaths(row + 1, dim, ratings, length + 1, blob_choices,
+							unicharset, label, output_file);
+					}
+					else {
+						PrintPath(length + 1, blob_choices, unicharset, label, output_file);
+					}
+				}
+			}
+		}
+	}
+
+	// Runs classify_word_pass1() on the current word. Outputs Tesseract's
+	// raw choice as a result of the classification. For words labeled with a
+	// single unichar also outputs all alternatives from blob_choices of the
+	// best choice.
+	void Tesseract::ambigs_classify_and_output(const char *label,
+		PAGE_RES_IT* pr_it,
+		FILE *output_file) {
+		// Classify word.
+		fflush(stdout);
+		WordData word_data(*pr_it);
+		SetupWordPassN(1, &word_data);
+		classify_word_and_language(1, pr_it, &word_data);
+		WERD_RES* werd_res = word_data.word;
+		WERD_CHOICE *best_choice = werd_res->best_choice;
+		ASSERT_HOST(best_choice != NULL);
+
+		// Compute the number of unichars in the label.
+		GenericVector<UNICHAR_ID> encoding;
+		if (!unicharset.encode_string(label, true, &encoding, NULL, NULL)) {
+			tprintf("Not outputting illegal unichar %s\n", label);
+			return;
+		}
+
+		// Dump all paths through the ratings matrix (which is normally small).
+		int dim = werd_res->ratings->dimension();
+		const BLOB_CHOICE** blob_choices = new const BLOB_CHOICE*[dim];
+		PrintMatrixPaths(0, dim, *werd_res->ratings, 0, blob_choices,
+			unicharset, label, output_file);
+		delete[] blob_choices;
+	}
+
+}  // namespace tesseract
--- a/hgdriver/3rdparty/hgOCR/include/ccmain/reject.cpp
+++ b/hgdriver/3rdparty/hgOCR/include/ccmain/reject.cpp
@ -0,0 +1,798 @@
+/**********************************************************************
+ * File:        reject.cpp  (Formerly reject.c)
+ * Description: Rejection functions used in tessedit
+ * Author:    Phil Cheatle
+ * Created:   Wed Sep 23 16:50:21 BST 1992
+ *
+ * (C) Copyright 1992, Hewlett-Packard Ltd.
+ ** Licensed under the Apache License, Version 2.0 (the "License");
+ ** you may not use this file except in compliance with the License.
+ ** You may obtain a copy of the License at
+ ** http://www.apache.org/licenses/LICENSE-2.0
+ ** Unless required by applicable law or agreed to in writing, software
+ ** distributed under the License is distributed on an "AS IS" BASIS,
+ ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ ** See the License for the specific language governing permissions and
+ ** limitations under the License.
+ *
+ **********************************************************************/
+
+#ifdef _MSC_VER
+#pragma warning(disable:4244)  // Conversion warnings
+#pragma warning(disable:4305)  // int/float warnings
+#endif
+
+#include          "tessvars.h"
+#ifdef __UNIX__
+#include          <assert.h>
+#include          <errno.h>
+#endif
+#include          "scanutils.h"
+#include          <ctype.h>
+#include          <string.h>
+#include          "genericvector.h"
+#include          "reject.h"
+#include          "control.h"
+#include          "docqual.h"
+#include          "globaloc.h"  // For err_exit.
+#include          "globals.h"
+#include          "helpers.h"
+
+#include "tesseractclass.h"
+
+ // Include automatically generated configuration file if running autoconf.
+#ifdef HAVE_CONFIG_H
+#include "config_auto.h"
+#endif
+
+CLISTIZEH(STRING) CLISTIZE(STRING)
+
+/*************************************************************************
+ * set_done()
+ *
+ * Set the done flag based on the word acceptability criteria
+ *************************************************************************/
+
+	namespace tesseract {
+	void Tesseract::set_done(WERD_RES *word, inT16 pass) {
+		word->done = word->tess_accepted &&
+			(strchr(word->best_choice->unichar_string().string(), ' ') == NULL);
+		bool word_is_ambig = word->best_choice->dangerous_ambig_found();
+		bool word_from_dict = word->best_choice->permuter() == SYSTEM_DAWG_PERM ||
+			word->best_choice->permuter() == FREQ_DAWG_PERM ||
+			word->best_choice->permuter() == USER_DAWG_PERM;
+		if (word->done && (pass == 1) && (!word_from_dict || word_is_ambig) &&
+			one_ell_conflict(word, FALSE)) {
+			if (tessedit_rejection_debug) tprintf("one_ell_conflict detected\n");
+			word->done = FALSE;
+		}
+		if (word->done && ((!word_from_dict &&
+			word->best_choice->permuter() != NUMBER_PERM) || word_is_ambig)) {
+			if (tessedit_rejection_debug) tprintf("non-dict or ambig word detected\n");
+			word->done = FALSE;
+		}
+		if (tessedit_rejection_debug) {
+			tprintf("set_done(): done=%d\n", word->done);
+			word->best_choice->print("");
+		}
+	}
+
+
+	/*************************************************************************
+	 * make_reject_map()
+	 *
+	 * Sets the done flag to indicate whether the resylt is acceptable.
+	 *
+	 * Sets a reject map for the word.
+	 *************************************************************************/
+	void Tesseract::make_reject_map(WERD_RES *word, ROW *row, inT16 pass) {
+		int i;
+		int offset;
+
+		flip_0O(word);
+		check_debug_pt(word, -1);     // For trap only
+		set_done(word, pass);  // Set acceptance
+		word->reject_map.initialise(word->best_choice->unichar_lengths().length());
+		reject_blanks(word);
+		/*
+		0: Rays original heuristic - the baseline
+		*/
+		if (tessedit_reject_mode == 0) {
+			if (!word->done)
+				reject_poor_matches(word);
+		}
+		else if (tessedit_reject_mode == 5) {
+			/*
+			5: Reject I/1/l from words where there is no strong contextual confirmation;
+			  the whole of any unacceptable words (incl PERM rej of dubious 1/I/ls);
+			  and the whole of any words which are very small
+			*/
+			if (kBlnXHeight / word->denorm.y_scale() <= min_sane_x_ht_pixels) {
+				word->reject_map.rej_word_small_xht();
+			}
+			else {
+				one_ell_conflict(word, TRUE);
+				/*
+				  Originally the code here just used the done flag. Now I have duplicated
+				  and unpacked the conditions for setting the done flag so that each
+				  mechanism can be turned on or off independently. This works WITHOUT
+				  affecting the done flag setting.
+				*/
+				if (rej_use_tess_accepted && !word->tess_accepted)
+					word->reject_map.rej_word_not_tess_accepted();
+
+				if (rej_use_tess_blanks &&
+					(strchr(word->best_choice->unichar_string().string(), ' ') != NULL))
+					word->reject_map.rej_word_contains_blanks();
+
+				WERD_CHOICE* best_choice = word->best_choice;
+				if (rej_use_good_perm) {
+					if ((best_choice->permuter() == SYSTEM_DAWG_PERM ||
+						best_choice->permuter() == FREQ_DAWG_PERM ||
+						best_choice->permuter() == USER_DAWG_PERM) &&
+						(!rej_use_sensible_wd ||
+							acceptable_word_string(*word->uch_set,
+								best_choice->unichar_string().string(),
+								best_choice->unichar_lengths().string()) !=
+							AC_UNACCEPTABLE)) {
+						// PASSED TEST
+					}
+					else if (best_choice->permuter() == NUMBER_PERM) {
+						if (rej_alphas_in_number_perm) {
+							for (i = 0, offset = 0;
+								best_choice->unichar_string()[offset] != '\0';
+								offset += best_choice->unichar_lengths()[i++]) {
+								if (word->reject_map[i].accepted() &&
+									word->uch_set->get_isalpha(
+										best_choice->unichar_string().string() + offset,
+										best_choice->unichar_lengths()[i]))
+									word->reject_map[i].setrej_bad_permuter();
+								// rej alpha
+							}
+						}
+					}
+					else {
+						word->reject_map.rej_word_bad_permuter();
+					}
+				}
+				/* Ambig word rejection was here once !!*/
+			}
+		}
+		else {
+			tprintf("BAD tessedit_reject_mode\n");
+			err_exit();
+		}
+
+		if (tessedit_image_border > -1)
+			reject_edge_blobs(word);
+
+		check_debug_pt(word, 10);
+		if (tessedit_rejection_debug) {
+			tprintf("Permuter Type = %d\n", word->best_choice->permuter());
+			tprintf("Certainty: %f     Rating: %f\n",
+				word->best_choice->certainty(), word->best_choice->rating());
+			tprintf("Dict word: %d\n", dict_word(*(word->best_choice)));
+		}
+
+		flip_hyphens(word);
+		check_debug_pt(word, 20);
+	}
+}  // namespace tesseract
+
+
+void reject_blanks(WERD_RES *word) {
+	inT16 i;
+	inT16 offset;
+
+	for (i = 0, offset = 0; word->best_choice->unichar_string()[offset] != '\0';
+		offset += word->best_choice->unichar_lengths()[i], i += 1) {
+		if (word->best_choice->unichar_string()[offset] == ' ')
+			//rej unrecognised blobs
+			word->reject_map[i].setrej_tess_failure();
+	}
+}
+
+namespace tesseract {
+	void Tesseract::reject_I_1_L(WERD_RES *word) {
+		inT16 i;
+		inT16 offset;
+
+		for (i = 0, offset = 0; word->best_choice->unichar_string()[offset] != '\0';
+			offset += word->best_choice->unichar_lengths()[i], i += 1) {
+			if (STRING(conflict_set_I_l_1).
+				contains(word->best_choice->unichar_string()[offset])) {
+				//rej 1Il conflict
+				word->reject_map[i].setrej_1Il_conflict();
+			}
+		}
+	}
+}  // namespace tesseract
+
+
+void reject_poor_matches(WERD_RES *word) {
+	float threshold = compute_reject_threshold(word->best_choice);
+	for (int i = 0; i < word->best_choice->length(); ++i) {
+		if (word->best_choice->unichar_id(i) == UNICHAR_SPACE)
+			word->reject_map[i].setrej_tess_failure();
+		else if (word->best_choice->certainty(i) < threshold)
+			word->reject_map[i].setrej_poor_match();
+	}
+}
+
+
+/**********************************************************************
+ * compute_reject_threshold
+ *
+ * Set a rejection threshold for this word.
+ * Initially this is a trivial function which looks for the largest
+ * gap in the certainty value.
+ **********************************************************************/
+
+float compute_reject_threshold(WERD_CHOICE* word) {
+	float threshold;               // rejection threshold
+	float bestgap = 0.0f;          // biggest gap
+	float gapstart;                // bottom of gap
+								   // super iterator
+	BLOB_CHOICE_IT choice_it;      // real iterator
+
+	int blob_count = word->length();
+	GenericVector<float> ratings;
+	ratings.resize_no_init(blob_count);
+	for (int i = 0; i < blob_count; ++i) {
+		ratings[i] = word->certainty(i);
+	}
+	ratings.sort();
+	gapstart = ratings[0] - 1;     // all reject if none better
+	if (blob_count >= 3) {
+		for (int index = 0; index < blob_count - 1; index++) {
+			if (ratings[index + 1] - ratings[index] > bestgap) {
+				bestgap = ratings[index + 1] - ratings[index];
+				// find biggest
+				gapstart = ratings[index];
+			}
+		}
+	}
+	threshold = gapstart + bestgap / 2;
+
+	return threshold;
+}
+
+
+/*************************************************************************
+ * reject_edge_blobs()
+ *
+ * If the word is perilously close to the edge of the image, reject those blobs
+ * in the word which are too close to the edge as they could be clipped.
+ *************************************************************************/
+namespace tesseract {
+	void Tesseract::reject_edge_blobs(WERD_RES *word) {
+		TBOX word_box = word->word->bounding_box();
+		// Use the box_word as it is already denormed back to image coordinates.
+		int blobcount = word->box_word->length();
+
+		if (word_box.left() < tessedit_image_border ||
+			word_box.bottom() < tessedit_image_border ||
+			word_box.right() + tessedit_image_border > ImageWidth() - 1 ||
+			word_box.top() + tessedit_image_border > ImageHeight() - 1) {
+			ASSERT_HOST(word->reject_map.length() == blobcount);
+			for (int blobindex = 0; blobindex < blobcount; blobindex++) {
+				TBOX blob_box = word->box_word->BlobBox(blobindex);
+				if (blob_box.left() < tessedit_image_border ||
+					blob_box.bottom() < tessedit_image_border ||
+					blob_box.right() + tessedit_image_border > ImageWidth() - 1 ||
+					blob_box.top() + tessedit_image_border > ImageHeight() - 1) {
+					word->reject_map[blobindex].setrej_edge_char();
+					// Close to edge
+				}
+			}
+		}
+	}
+
+	/**********************************************************************
+	 * one_ell_conflict()
+	 *
+	 * Identify words where there is a potential I/l/1 error.
+	 * - A bundle of contextual heuristics!
+	 **********************************************************************/
+	BOOL8 Tesseract::one_ell_conflict(WERD_RES *word_res, BOOL8 update_map) {
+		const char *word;
+		const char *lengths;
+		inT16 word_len;                //its length
+		inT16 first_alphanum_index_;
+		inT16 first_alphanum_offset_;
+		inT16 i;
+		inT16 offset;
+		BOOL8 non_conflict_set_char;   //non conf set a/n?
+		BOOL8 conflict = FALSE;
+		BOOL8 allow_1s;
+		ACCEPTABLE_WERD_TYPE word_type;
+		BOOL8 dict_perm_type;
+		BOOL8 dict_word_ok;
+		int dict_word_type;
+
+		word = word_res->best_choice->unichar_string().string();
+		lengths = word_res->best_choice->unichar_lengths().string();
+		word_len = strlen(lengths);
+		/*
+		  If there are no occurrences of the conflict set characters then the word
+		  is OK.
+		*/
+		if (strpbrk(word, conflict_set_I_l_1.string()) == NULL)
+			return FALSE;
+
+		/*
+		  There is a conflict if there are NO other (confirmed) alphanumerics apart
+		  from those in the conflict set.
+		*/
+
+		for (i = 0, offset = 0, non_conflict_set_char = FALSE;
+			(i < word_len) && !non_conflict_set_char; offset += lengths[i++])
+			non_conflict_set_char =
+			(word_res->uch_set->get_isalpha(word + offset, lengths[i]) ||
+				word_res->uch_set->get_isdigit(word + offset, lengths[i])) &&
+			!STRING(conflict_set_I_l_1).contains(word[offset]);
+		if (!non_conflict_set_char) {
+			if (update_map)
+				reject_I_1_L(word_res);
+			return TRUE;
+		}
+
+		/*
+		  If the word is accepted by a dawg permuter, and the first alpha character
+		  is "I" or "l", check to see if the alternative is also a dawg word. If it
+		  is, then there is a potential error otherwise the word is ok.
+		*/
+
+		dict_perm_type = (word_res->best_choice->permuter() == SYSTEM_DAWG_PERM) ||
+			(word_res->best_choice->permuter() == USER_DAWG_PERM) ||
+			(rej_trust_doc_dawg &&
+			(word_res->best_choice->permuter() == DOC_DAWG_PERM)) ||
+				(word_res->best_choice->permuter() == FREQ_DAWG_PERM);
+		dict_word_type = dict_word(*(word_res->best_choice));
+		dict_word_ok = (dict_word_type > 0) &&
+			(rej_trust_doc_dawg || (dict_word_type != DOC_DAWG_PERM));
+
+		if ((rej_1Il_use_dict_word && dict_word_ok) ||
+			(rej_1Il_trust_permuter_type && dict_perm_type) ||
+			(dict_perm_type && dict_word_ok)) {
+			first_alphanum_index_ = first_alphanum_index(word, lengths);
+			first_alphanum_offset_ = first_alphanum_offset(word, lengths);
+			if (lengths[first_alphanum_index_] == 1 &&
+				word[first_alphanum_offset_] == 'I') {
+				word_res->best_choice->unichar_string()[first_alphanum_offset_] = 'l';
+				if (safe_dict_word(word_res) > 0) {
+					word_res->best_choice->unichar_string()[first_alphanum_offset_] = 'I';
+					if (update_map)
+						word_res->reject_map[first_alphanum_index_].
+						setrej_1Il_conflict();
+					return TRUE;
+				}
+				else {
+					word_res->best_choice->unichar_string()[first_alphanum_offset_] = 'I';
+					return FALSE;
+				}
+			}
+
+			if (lengths[first_alphanum_index_] == 1 &&
+				word[first_alphanum_offset_] == 'l') {
+				word_res->best_choice->unichar_string()[first_alphanum_offset_] = 'I';
+				if (safe_dict_word(word_res) > 0) {
+					word_res->best_choice->unichar_string()[first_alphanum_offset_] = 'l';
+					if (update_map)
+						word_res->reject_map[first_alphanum_index_].
+						setrej_1Il_conflict();
+					return TRUE;
+				}
+				else {
+					word_res->best_choice->unichar_string()[first_alphanum_offset_] = 'l';
+					return FALSE;
+				}
+			}
+			return FALSE;
+		}
+
+		/*
+		  NEW 1Il code. The old code relied on permuter types too much. In fact,
+		  tess will use TOP_CHOICE permute for good things like "palette".
+		  In this code the string is examined independently to see if it looks like
+		  a well formed word.
+		*/
+
+		/*
+		  REGARDLESS OF PERMUTER, see if flipping a leading I/l generates a
+		  dictionary word.
+		*/
+		first_alphanum_index_ = first_alphanum_index(word, lengths);
+		first_alphanum_offset_ = first_alphanum_offset(word, lengths);
+		if (lengths[first_alphanum_index_] == 1 &&
+			word[first_alphanum_offset_] == 'l') {
+			word_res->best_choice->unichar_string()[first_alphanum_offset_] = 'I';
+			if (safe_dict_word(word_res) > 0)
+				return FALSE;
+			else
+				word_res->best_choice->unichar_string()[first_alphanum_offset_] = 'l';
+		}
+		else if (lengths[first_alphanum_index_] == 1 &&
+			word[first_alphanum_offset_] == 'I') {
+			word_res->best_choice->unichar_string()[first_alphanum_offset_] = 'l';
+			if (safe_dict_word(word_res) > 0)
+				return FALSE;
+			else
+				word_res->best_choice->unichar_string()[first_alphanum_offset_] = 'I';
+		}
+		/*
+		  For strings containing digits:
+			If there are no alphas OR the numeric permuter liked the word,
+			  reject any non 1 conflict chs
+			Else reject all conflict chs
+		*/
+		if (word_contains_non_1_digit(word, lengths)) {
+			allow_1s = (alpha_count(word, lengths) == 0) ||
+				(word_res->best_choice->permuter() == NUMBER_PERM);
+
+			inT16 offset;
+			conflict = FALSE;
+			for (i = 0, offset = 0; word[offset] != '\0';
+				offset += word_res->best_choice->unichar_lengths()[i++]) {
+				if ((!allow_1s || (word[offset] != '1')) &&
+					STRING(conflict_set_I_l_1).contains(word[offset])) {
+					if (update_map)
+						word_res->reject_map[i].setrej_1Il_conflict();
+					conflict = TRUE;
+				}
+			}
+			return conflict;
+		}
+		/*
+		  For anything else. See if it conforms to an acceptable word type. If so,
+		  treat accordingly.
+		*/
+		word_type = acceptable_word_string(*word_res->uch_set, word, lengths);
+		if ((word_type == AC_LOWER_CASE) || (word_type == AC_INITIAL_CAP)) {
+			first_alphanum_index_ = first_alphanum_index(word, lengths);
+			first_alphanum_offset_ = first_alphanum_offset(word, lengths);
+			if (STRING(conflict_set_I_l_1).contains(word[first_alphanum_offset_])) {
+				if (update_map)
+					word_res->reject_map[first_alphanum_index_].
+					setrej_1Il_conflict();
+				return TRUE;
+			}
+			else
+				return FALSE;
+		}
+		else if (word_type == AC_UPPER_CASE) {
+			return FALSE;
+		}
+		else {
+			if (update_map)
+				reject_I_1_L(word_res);
+			return TRUE;
+		}
+	}
+
+
+	inT16 Tesseract::first_alphanum_index(const char *word,
+		const char *word_lengths) {
+		inT16 i;
+		inT16 offset;
+
+		for (i = 0, offset = 0; word[offset] != '\0'; offset += word_lengths[i++]) {
+			if (unicharset.get_isalpha(word + offset, word_lengths[i]) ||
+				unicharset.get_isdigit(word + offset, word_lengths[i]))
+				return i;
+		}
+		return -1;
+	}
+
+	inT16 Tesseract::first_alphanum_offset(const char *word,
+		const char *word_lengths) {
+		inT16 i;
+		inT16 offset;
+
+		for (i = 0, offset = 0; word[offset] != '\0'; offset += word_lengths[i++]) {
+			if (unicharset.get_isalpha(word + offset, word_lengths[i]) ||
+				unicharset.get_isdigit(word + offset, word_lengths[i]))
+				return offset;
+		}
+		return -1;
+	}
+
+	inT16 Tesseract::alpha_count(const char *word,
+		const char *word_lengths) {
+		inT16 i;
+		inT16 offset;
+		inT16 count = 0;
+
+		for (i = 0, offset = 0; word[offset] != '\0'; offset += word_lengths[i++]) {
+			if (unicharset.get_isalpha(word + offset, word_lengths[i]))
+				count++;
+		}
+		return count;
+	}
+
+
+	BOOL8 Tesseract::word_contains_non_1_digit(const char *word,
+		const char *word_lengths) {
+		inT16 i;
+		inT16 offset;
+
+		for (i = 0, offset = 0; word[offset] != '\0'; offset += word_lengths[i++]) {
+			if (unicharset.get_isdigit(word + offset, word_lengths[i]) &&
+				(word_lengths[i] != 1 || word[offset] != '1'))
+				return TRUE;
+		}
+		return FALSE;
+	}
+
+	/*************************************************************************
+	 * dont_allow_1Il()
+	 * Don't unreject LONE accepted 1Il conflict set chars
+	 *************************************************************************/
+	void Tesseract::dont_allow_1Il(WERD_RES *word) {
+		int i = 0;
+		int offset;
+		int word_len = word->reject_map.length();
+		const char *s = word->best_choice->unichar_string().string();
+		const char *lengths = word->best_choice->unichar_lengths().string();
+		BOOL8 accepted_1Il = FALSE;
+
+		for (i = 0, offset = 0; i < word_len;
+			offset += word->best_choice->unichar_lengths()[i++]) {
+			if (word->reject_map[i].accepted()) {
+				if (STRING(conflict_set_I_l_1).contains(s[offset])) {
+					accepted_1Il = TRUE;
+				}
+				else {
+					if (word->uch_set->get_isalpha(s + offset, lengths[i]) ||
+						word->uch_set->get_isdigit(s + offset, lengths[i]))
+						return;                // >=1 non 1Il ch accepted
+				}
+			}
+		}
+		if (!accepted_1Il)
+			return;                      //Nothing to worry about
+
+		for (i = 0, offset = 0; i < word_len;
+			offset += word->best_choice->unichar_lengths()[i++]) {
+			if (STRING(conflict_set_I_l_1).contains(s[offset]) &&
+				word->reject_map[i].accepted())
+				word->reject_map[i].setrej_postNN_1Il();
+		}
+	}
+
+
+	inT16 Tesseract::count_alphanums(WERD_RES *word_res) {
+		int count = 0;
+		const WERD_CHOICE *best_choice = word_res->best_choice;
+		for (int i = 0; i < word_res->reject_map.length(); ++i) {
+			if ((word_res->reject_map[i].accepted()) &&
+				(word_res->uch_set->get_isalpha(best_choice->unichar_id(i)) ||
+					word_res->uch_set->get_isdigit(best_choice->unichar_id(i)))) {
+				count++;
+			}
+		}
+		return count;
+	}
+
+
+	// reject all if most rejected.
+	void Tesseract::reject_mostly_rejects(WERD_RES *word) {
+		/* Reject the whole of the word if the fraction of rejects exceeds a limit */
+
+		if ((float)word->reject_map.reject_count() / word->reject_map.length() >=
+			rej_whole_of_mostly_reject_word_fract)
+			word->reject_map.rej_word_mostly_rej();
+	}
+
+
+	BOOL8 Tesseract::repeated_nonalphanum_wd(WERD_RES *word, ROW *row) {
+		inT16 char_quality;
+		inT16 accepted_char_quality;
+
+		if (word->best_choice->unichar_lengths().length() <= 1)
+			return FALSE;
+
+		if (!STRING(ok_repeated_ch_non_alphanum_wds).
+			contains(word->best_choice->unichar_string()[0]))
+			return FALSE;
+
+		UNICHAR_ID uch_id = word->best_choice->unichar_id(0);
+		for (int i = 1; i < word->best_choice->length(); ++i) {
+			if (word->best_choice->unichar_id(i) != uch_id) return FALSE;
+		}
+
+		word_char_quality(word, row, &char_quality, &accepted_char_quality);
+
+		if ((word->best_choice->unichar_lengths().length() == char_quality) &&
+			(char_quality == accepted_char_quality))
+			return TRUE;
+		else
+			return FALSE;
+	}
+
+	inT16 Tesseract::safe_dict_word(const WERD_RES *werd_res) {
+		const WERD_CHOICE &word = *werd_res->best_choice;
+		int dict_word_type = werd_res->tesseract->dict_word(word);
+		return dict_word_type == DOC_DAWG_PERM ? 0 : dict_word_type;
+	}
+
+	// Note: After running this function word_res->ratings
+	// might not contain the right BLOB_CHOICE corresponding to each character
+	// in word_res->best_choice.
+	void Tesseract::flip_hyphens(WERD_RES *word_res) {
+		WERD_CHOICE *best_choice = word_res->best_choice;
+		int i;
+		int prev_right = -9999;
+		int next_left;
+		TBOX out_box;
+		float aspect_ratio;
+
+		if (tessedit_lower_flip_hyphen <= 1)
+			return;
+
+		int num_blobs = word_res->rebuild_word->NumBlobs();
+		UNICHAR_ID unichar_dash = word_res->uch_set->unichar_to_id("-");
+		for (i = 0; i < best_choice->length() && i < num_blobs; ++i) {
+			TBLOB* blob = word_res->rebuild_word->blobs[i];
+			out_box = blob->bounding_box();
+			if (i + 1 == num_blobs)
+				next_left = 9999;
+			else
+				next_left = word_res->rebuild_word->blobs[i + 1]->bounding_box().left();
+			// Don't touch small or touching blobs - it is too dangerous.
+			if ((out_box.width() > 8 * word_res->denorm.x_scale()) &&
+				(out_box.left() > prev_right) && (out_box.right() < next_left)) {
+				aspect_ratio = out_box.width() / (float)out_box.height();
+				if (word_res->uch_set->eq(best_choice->unichar_id(i), ".")) {
+					if (aspect_ratio >= tessedit_upper_flip_hyphen &&
+						word_res->uch_set->contains_unichar_id(unichar_dash) &&
+						word_res->uch_set->get_enabled(unichar_dash)) {
+						/* Certain HYPHEN */
+						best_choice->set_unichar_id(unichar_dash, i);
+						if (word_res->reject_map[i].rejected())
+							word_res->reject_map[i].setrej_hyphen_accept();
+					}
+					if ((aspect_ratio > tessedit_lower_flip_hyphen) &&
+						word_res->reject_map[i].accepted())
+						//Suspected HYPHEN
+						word_res->reject_map[i].setrej_hyphen();
+				}
+				else if (best_choice->unichar_id(i) == unichar_dash) {
+					if ((aspect_ratio >= tessedit_upper_flip_hyphen) &&
+						(word_res->reject_map[i].rejected()))
+						word_res->reject_map[i].setrej_hyphen_accept();
+					//Certain HYPHEN
+
+					if ((aspect_ratio <= tessedit_lower_flip_hyphen) &&
+						(word_res->reject_map[i].accepted()))
+						//Suspected HYPHEN
+						word_res->reject_map[i].setrej_hyphen();
+				}
+			}
+			prev_right = out_box.right();
+		}
+	}
+
+	// Note: After running this function word_res->ratings
+	// might not contain the right BLOB_CHOICE corresponding to each character
+	// in word_res->best_choice.
+	void Tesseract::flip_0O(WERD_RES *word_res) {
+		WERD_CHOICE *best_choice = word_res->best_choice;
+		int i;
+		TBOX out_box;
+
+		if (!tessedit_flip_0O)
+			return;
+
+		int num_blobs = word_res->rebuild_word->NumBlobs();
+		for (i = 0; i < best_choice->length() && i < num_blobs; ++i) {
+			TBLOB* blob = word_res->rebuild_word->blobs[i];
+			if (word_res->uch_set->get_isupper(best_choice->unichar_id(i)) ||
+				word_res->uch_set->get_isdigit(best_choice->unichar_id(i))) {
+				out_box = blob->bounding_box();
+				if ((out_box.top() < kBlnBaselineOffset + kBlnXHeight) ||
+					(out_box.bottom() > kBlnBaselineOffset + kBlnXHeight / 4))
+					return;                  //Beware words with sub/superscripts
+			}
+		}
+		UNICHAR_ID unichar_0 = word_res->uch_set->unichar_to_id("0");
+		UNICHAR_ID unichar_O = word_res->uch_set->unichar_to_id("O");
+		if (unichar_0 == INVALID_UNICHAR_ID ||
+			!word_res->uch_set->get_enabled(unichar_0) ||
+			unichar_O == INVALID_UNICHAR_ID ||
+			!word_res->uch_set->get_enabled(unichar_O)) {
+			return;  // 0 or O are not present/enabled in unicharset
+		}
+		for (i = 1; i < best_choice->length(); ++i) {
+			if (best_choice->unichar_id(i) == unichar_0 ||
+				best_choice->unichar_id(i) == unichar_O) {
+				/* A0A */
+				if ((i + 1) < best_choice->length() &&
+					non_O_upper(*word_res->uch_set, best_choice->unichar_id(i - 1)) &&
+					non_O_upper(*word_res->uch_set, best_choice->unichar_id(i + 1))) {
+					best_choice->set_unichar_id(unichar_O, i);
+				}
+				/* A00A */
+				if (non_O_upper(*word_res->uch_set, best_choice->unichar_id(i - 1)) &&
+					(i + 1) < best_choice->length() &&
+					(best_choice->unichar_id(i + 1) == unichar_0 ||
+						best_choice->unichar_id(i + 1) == unichar_O) &&
+						(i + 2) < best_choice->length() &&
+					non_O_upper(*word_res->uch_set, best_choice->unichar_id(i + 2))) {
+					best_choice->set_unichar_id(unichar_O, i);
+					i++;
+				}
+				/* AA0<non digit or end of word> */
+				if ((i > 1) &&
+					non_O_upper(*word_res->uch_set, best_choice->unichar_id(i - 2)) &&
+					non_O_upper(*word_res->uch_set, best_choice->unichar_id(i - 1)) &&
+					(((i + 1) < best_choice->length() &&
+						!word_res->uch_set->get_isdigit(best_choice->unichar_id(i + 1)) &&
+						!word_res->uch_set->eq(best_choice->unichar_id(i + 1), "l") &&
+						!word_res->uch_set->eq(best_choice->unichar_id(i + 1), "I")) ||
+						(i == best_choice->length() - 1))) {
+					best_choice->set_unichar_id(unichar_O, i);
+				}
+				/* 9O9 */
+				if (non_0_digit(*word_res->uch_set, best_choice->unichar_id(i - 1)) &&
+					(i + 1) < best_choice->length() &&
+					non_0_digit(*word_res->uch_set, best_choice->unichar_id(i + 1))) {
+					best_choice->set_unichar_id(unichar_0, i);
+				}
+				/* 9OOO */
+				if (non_0_digit(*word_res->uch_set, best_choice->unichar_id(i - 1)) &&
+					(i + 2) < best_choice->length() &&
+					(best_choice->unichar_id(i + 1) == unichar_0 ||
+						best_choice->unichar_id(i + 1) == unichar_O) &&
+						(best_choice->unichar_id(i + 2) == unichar_0 ||
+							best_choice->unichar_id(i + 2) == unichar_O)) {
+					best_choice->set_unichar_id(unichar_0, i);
+					best_choice->set_unichar_id(unichar_0, i + 1);
+					best_choice->set_unichar_id(unichar_0, i + 2);
+					i += 2;
+				}
+				/* 9OO<non upper> */
+				if (non_0_digit(*word_res->uch_set, best_choice->unichar_id(i - 1)) &&
+					(i + 2) < best_choice->length() &&
+					(best_choice->unichar_id(i + 1) == unichar_0 ||
+						best_choice->unichar_id(i + 1) == unichar_O) &&
+					!word_res->uch_set->get_isupper(best_choice->unichar_id(i + 2))) {
+					best_choice->set_unichar_id(unichar_0, i);
+					best_choice->set_unichar_id(unichar_0, i + 1);
+					i++;
+				}
+				/* 9O<non upper> */
+				if (non_0_digit(*word_res->uch_set, best_choice->unichar_id(i - 1)) &&
+					(i + 1) < best_choice->length() &&
+					!word_res->uch_set->get_isupper(best_choice->unichar_id(i + 1))) {
+					best_choice->set_unichar_id(unichar_0, i);
+				}
+				/* 9[.,]OOO.. */
+				if ((i > 1) &&
+					(word_res->uch_set->eq(best_choice->unichar_id(i - 1), ".") ||
+						word_res->uch_set->eq(best_choice->unichar_id(i - 1), ",")) &&
+						(word_res->uch_set->get_isdigit(best_choice->unichar_id(i - 2)) ||
+							best_choice->unichar_id(i - 2) == unichar_O)) {
+					if (best_choice->unichar_id(i - 2) == unichar_O) {
+						best_choice->set_unichar_id(unichar_0, i - 2);
+					}
+					while (i < best_choice->length() &&
+						(best_choice->unichar_id(i) == unichar_O ||
+							best_choice->unichar_id(i) == unichar_0)) {
+						best_choice->set_unichar_id(unichar_0, i);
+						i++;
+					}
+					i--;
+				}
+			}
+		}
+	}
+
+	BOOL8 Tesseract::non_O_upper(const UNICHARSET& ch_set, UNICHAR_ID unichar_id) {
+		return ch_set.get_isupper(unichar_id) && !ch_set.eq(unichar_id, "O");
+	}
+
+	BOOL8 Tesseract::non_0_digit(const UNICHARSET& ch_set, UNICHAR_ID unichar_id) {
+		return ch_set.get_isdigit(unichar_id) && !ch_set.eq(unichar_id, "0");
+	}
+}  // namespace tesseract
--- a/hgdriver/3rdparty/hgOCR/include/ccmain/reject.h
+++ b/hgdriver/3rdparty/hgOCR/include/ccmain/reject.h
@ -0,0 +1,34 @@
+/**********************************************************************
+ * File:        reject.h  (Formerly reject.h)
+ * Description: Rejection functions used in tessedit
+ * Author:		Phil Cheatle
+ * Created:		Wed Sep 23 16:50:21 BST 1992
+ *
+ * (C) Copyright 1992, Hewlett-Packard Ltd.
+ ** Licensed under the Apache License, Version 2.0 (the "License");
+ ** you may not use this file except in compliance with the License.
+ ** You may obtain a copy of the License at
+ ** http://www.apache.org/licenses/LICENSE-2.0
+ ** Unless required by applicable law or agreed to in writing, software
+ ** distributed under the License is distributed on an "AS IS" BASIS,
+ ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ ** See the License for the specific language governing permissions and
+ ** limitations under the License.
+ *
+ **********************************************************************/
+
+#ifndef           REJECT_H
+#define           REJECT_H
+
+#include          "params.h"
+#include          "pageres.h"
+
+void reject_blanks(WERD_RES *word);
+void reject_poor_matches(WERD_RES *word);
+float compute_reject_threshold(WERD_CHOICE* word);
+BOOL8 word_contains_non_1_digit(const char *word, const char *word_lengths);
+void dont_allow_1Il(WERD_RES *word);
+void flip_hyphens(WERD_RES *word);
+void flip_0O(WERD_RES *word);
+BOOL8 non_0_digit(const char* str, int length);
+#endif
--- a/hgdriver/3rdparty/hgOCR/include/ccmain/resultiterator.cpp
+++ b/hgdriver/3rdparty/hgOCR/include/ccmain/resultiterator.cpp
@ -0,0 +1,683 @@
+///////////////////////////////////////////////////////////////////////
+// File:        resultiterator.cpp
+// Description: Iterator for tesseract results that is capable of
+//              iterating in proper reading order over Bi Directional
+//              (e.g. mixed Hebrew and English) text.
+// Author:      David Eger
+// Created:     Fri May 27 13:58:06 PST 2011
+//
+// (C) Copyright 2011, Google Inc.
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+// http://www.apache.org/licenses/LICENSE-2.0
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+///////////////////////////////////////////////////////////////////////
+
+#include "resultiterator.h"
+
+#include "allheaders.h"
+#include "pageres.h"
+#include "strngs.h"
+#include "tesseractclass.h"
+#include "unicharset.h"
+#include "unicodes.h"
+
+namespace tesseract {
+
+	ResultIterator::ResultIterator(const LTRResultIterator &resit)
+		: LTRResultIterator(resit) {
+		in_minor_direction_ = false;
+		at_beginning_of_minor_run_ = false;
+		preserve_interword_spaces_ = false;
+
+		BoolParam *p = ParamUtils::FindParam<BoolParam>(
+			"preserve_interword_spaces", GlobalParams()->bool_params,
+			tesseract_->params()->bool_params);
+		if (p != NULL) preserve_interword_spaces_ = (bool)(*p);
+
+		current_paragraph_is_ltr_ = CurrentParagraphIsLtr();
+		MoveToLogicalStartOfTextline();
+	}
+
+	ResultIterator *ResultIterator::StartOfParagraph(
+		const LTRResultIterator &resit) {
+		return new ResultIterator(resit);
+	}
+
+	bool ResultIterator::ParagraphIsLtr() const {
+		return current_paragraph_is_ltr_;
+	}
+
+	bool ResultIterator::CurrentParagraphIsLtr() const {
+		if (!it_->word())
+			return true;  // doesn't matter.
+		LTRResultIterator it(*this);
+		it.RestartParagraph();
+		// Try to figure out the ltr-ness of the paragraph.  The rules below
+		// make more sense in the context of a difficult paragraph example.
+		// Here we denote {ltr characters, RTL CHARACTERS}:
+		//
+		//   "don't go in there!" DAIS EH
+		//   EHT OTNI DEPMUJ FELSMIH NEHT DNA
+		//                  .GNIDLIUB GNINRUB
+		//
+		// On the first line, the left-most word is LTR and the rightmost word
+		// is RTL.  Thus, we are better off taking the majority direction for
+		// the whole paragraph contents.  So instead of "the leftmost word is LTR"
+		// indicating an LTR paragraph, we use a heuristic about what RTL paragraphs
+		// would not do:  Typically an RTL paragraph would *not* start with an LTR
+		// word.  So our heuristics are as follows:
+		//
+		// (1) If the first text line has an RTL word in the left-most position
+		//     it is RTL.
+		// (2) If the first text line has an LTR word in the right-most position
+		//     it is LTR.
+		// (3) If neither of the above is true, take the majority count for the
+		//     paragraph -- if there are more rtl words, it is RTL.  If there
+		//     are more LTR words, it's LTR.
+		bool leftmost_rtl = it.WordDirection() == DIR_RIGHT_TO_LEFT;
+		bool rightmost_ltr = it.WordDirection() == DIR_LEFT_TO_RIGHT;
+		int num_ltr, num_rtl;
+		num_rtl = leftmost_rtl ? 1 : 0;
+		num_ltr = (it.WordDirection() == DIR_LEFT_TO_RIGHT) ? 1 : 0;
+		for (it.Next(RIL_WORD);
+			!it.Empty(RIL_WORD) && !it.IsAtBeginningOf(RIL_TEXTLINE);
+			it.Next(RIL_WORD)) {
+			StrongScriptDirection dir = it.WordDirection();
+			rightmost_ltr = (dir == DIR_LEFT_TO_RIGHT);
+			num_rtl += (dir == DIR_RIGHT_TO_LEFT) ? 1 : 0;
+			num_ltr += rightmost_ltr ? 1 : 0;
+		}
+		if (leftmost_rtl)
+			return false;
+		if (rightmost_ltr)
+			return true;
+		// First line is ambiguous.  Take statistics on the whole paragraph.
+		if (!it.Empty(RIL_WORD) && !it.IsAtBeginningOf(RIL_PARA)) do {
+			StrongScriptDirection dir = it.WordDirection();
+			num_rtl += (dir == DIR_RIGHT_TO_LEFT) ? 1 : 0;
+			num_ltr += (dir == DIR_LEFT_TO_RIGHT) ? 1 : 0;
+		} while (it.Next(RIL_WORD) && !it.IsAtBeginningOf(RIL_PARA));
+		return num_ltr >= num_rtl;
+	}
+
+	const int ResultIterator::kMinorRunStart = -1;
+	const int ResultIterator::kMinorRunEnd = -2;
+	const int ResultIterator::kComplexWord = -3;
+
+	void ResultIterator::CalculateBlobOrder(
+		GenericVector<int> *blob_indices) const {
+		bool context_is_ltr = current_paragraph_is_ltr_ ^ in_minor_direction_;
+		blob_indices->clear();
+		if (Empty(RIL_WORD)) return;
+		if (context_is_ltr || it_->word()->UnicharsInReadingOrder()) {
+			// Easy! just return the blobs in order;
+			for (int i = 0; i < word_length_; i++)
+				blob_indices->push_back(i);
+			return;
+		}
+
+		// The blobs are in left-to-right order, but the current reading context
+		// is right-to-left.
+		const int U_LTR = UNICHARSET::U_LEFT_TO_RIGHT;
+		const int U_RTL = UNICHARSET::U_RIGHT_TO_LEFT;
+		const int U_EURO_NUM = UNICHARSET::U_EUROPEAN_NUMBER;
+		const int U_EURO_NUM_SEP = UNICHARSET::U_EUROPEAN_NUMBER_SEPARATOR;
+		const int U_EURO_NUM_TERM = UNICHARSET::U_EUROPEAN_NUMBER_TERMINATOR;
+		const int U_COMMON_NUM_SEP = UNICHARSET::U_COMMON_NUMBER_SEPARATOR;
+		const int U_OTHER_NEUTRAL = UNICHARSET::U_OTHER_NEUTRAL;
+
+		// Step 1: Scan for and mark European Number sequences
+		//   [:ET:]*[:EN:]+(([:ES:]|[:CS:])?[:EN:]+)*[:ET:]*
+		GenericVector<int> letter_types;
+		for (int i = 0; i < word_length_; i++) {
+			letter_types.push_back(it_->word()->SymbolDirection(i));
+		}
+		// Convert a single separtor sandwiched between two EN's into an EN.
+		for (int i = 0; i + 2 < word_length_; i++) {
+			if (letter_types[i] == U_EURO_NUM && letter_types[i + 2] == U_EURO_NUM &&
+				(letter_types[i + 1] == U_EURO_NUM_SEP ||
+					letter_types[i + 1] == U_COMMON_NUM_SEP)) {
+				letter_types[i + 1] = U_EURO_NUM;
+			}
+		}
+		// Scan for sequences of European Number Terminators around ENs and convert
+		// them to ENs.
+		for (int i = 0; i < word_length_; i++) {
+			if (letter_types[i] == U_EURO_NUM_TERM) {
+				int j = i + 1;
+				while (j < word_length_ && letter_types[j] == U_EURO_NUM_TERM) { j++; }
+				if (j < word_length_ && letter_types[j] == U_EURO_NUM) {
+					// The sequence [i..j] should be converted to all European Numbers.
+					for (int k = i; k < j; k++) letter_types[k] = U_EURO_NUM;
+				}
+				j = i - 1;
+				while (j > -1 && letter_types[j] == U_EURO_NUM_TERM) { j--; }
+				if (j > -1 && letter_types[j] == U_EURO_NUM) {
+					// The sequence [j..i] should be converted to all European Numbers.
+					for (int k = j; k <= i; k++) letter_types[k] = U_EURO_NUM;
+				}
+			}
+		}
+		// Step 2: Convert all remaining types to either L or R.
+		// Sequences ([:L:]|[:EN:])+ (([:CS:]|[:ON:])+ ([:L:]|[:EN:])+)* -> L.
+		// All other are R.
+		for (int i = 0; i < word_length_;) {
+			int ti = letter_types[i];
+			if (ti == U_LTR || ti == U_EURO_NUM) {
+				// Left to right sequence; scan to the end of it.
+				int last_good = i;
+				for (int j = i + 1; j < word_length_; j++) {
+					int tj = letter_types[j];
+					if (tj == U_LTR || tj == U_EURO_NUM) {
+						last_good = j;
+					}
+					else if (tj == U_COMMON_NUM_SEP || tj == U_OTHER_NEUTRAL) {
+						// do nothing.
+					}
+					else {
+						break;
+					}
+				}
+				// [i..last_good] is the L sequence
+				for (int k = i; k <= last_good; k++) letter_types[k] = U_LTR;
+				i = last_good + 1;
+			}
+			else {
+				letter_types[i] = U_RTL;
+				i++;
+			}
+		}
+
+		// At this point, letter_types is entirely U_LTR or U_RTL.
+		for (int i = word_length_ - 1; i >= 0;) {
+			if (letter_types[i] == U_RTL) {
+				blob_indices->push_back(i);
+				i--;
+			}
+			else {
+				// left to right sequence.  scan to the beginning.
+				int j = i - 1;
+				for (; j >= 0 && letter_types[j] != U_RTL; j--) {}  // pass
+				// Now (j, i] is LTR
+				for (int k = j + 1; k <= i; k++) blob_indices->push_back(k);
+				i = j;
+			}
+		}
+		ASSERT_HOST(blob_indices->size() == word_length_);
+	}
+
+	static void PrintScriptDirs(const GenericVector<StrongScriptDirection> &dirs) {
+		for (int i = 0; i < dirs.size(); i++) {
+			switch (dirs[i]) {
+			case DIR_NEUTRAL: tprintf("N "); break;
+			case DIR_LEFT_TO_RIGHT: tprintf("L "); break;
+			case DIR_RIGHT_TO_LEFT: tprintf("R "); break;
+			case DIR_MIX: tprintf("Z "); break;
+			default: tprintf("? "); break;
+			}
+		}
+		tprintf("\n");
+	}
+
+	void ResultIterator::CalculateTextlineOrder(
+		bool paragraph_is_ltr,
+		const LTRResultIterator &resit,
+		GenericVectorEqEq<int> *word_indices) const {
+		GenericVector<StrongScriptDirection> directions;
+		CalculateTextlineOrder(paragraph_is_ltr, resit, &directions, word_indices);
+	}
+
+	void ResultIterator::CalculateTextlineOrder(
+		bool paragraph_is_ltr,
+		const LTRResultIterator &resit,
+		GenericVector<StrongScriptDirection> *dirs_arg,
+		GenericVectorEqEq<int> *word_indices) const {
+		GenericVector<StrongScriptDirection> dirs;
+		GenericVector<StrongScriptDirection> *directions;
+		directions = (dirs_arg != NULL) ? dirs_arg : &dirs;
+		directions->truncate(0);
+
+		// A LTRResultIterator goes strictly left-to-right word order.
+		LTRResultIterator ltr_it(resit);
+		ltr_it.RestartRow();
+		if (ltr_it.Empty(RIL_WORD)) return;
+		do {
+			directions->push_back(ltr_it.WordDirection());
+		} while (ltr_it.Next(RIL_WORD) && !ltr_it.IsAtBeginningOf(RIL_TEXTLINE));
+
+		word_indices->truncate(0);
+		CalculateTextlineOrder(paragraph_is_ltr, *directions, word_indices);
+	}
+
+	void ResultIterator::CalculateTextlineOrder(
+		bool paragraph_is_ltr,
+		const GenericVector<StrongScriptDirection> &word_dirs,
+		GenericVectorEqEq<int> *reading_order) {
+		reading_order->truncate(0);
+		if (word_dirs.size() == 0) return;
+
+		// Take all of the runs of minor direction words and insert them
+		// in reverse order.
+		int minor_direction, major_direction, major_step, start, end;
+		if (paragraph_is_ltr) {
+			start = 0;
+			end = word_dirs.size();
+			major_step = 1;
+			major_direction = DIR_LEFT_TO_RIGHT;
+			minor_direction = DIR_RIGHT_TO_LEFT;
+		}
+		else {
+			start = word_dirs.size() - 1;
+			end = -1;
+			major_step = -1;
+			major_direction = DIR_RIGHT_TO_LEFT;
+			minor_direction = DIR_LEFT_TO_RIGHT;
+			// Special rule: if there are neutral words at the right most side
+			//   of a line adjacent to a left-to-right word in the middle of the
+			//   line, we interpret the end of the line as a single LTR sequence.
+			if (word_dirs[start] == DIR_NEUTRAL) {
+				int neutral_end = start;
+				while (neutral_end > 0 && word_dirs[neutral_end] == DIR_NEUTRAL) {
+					neutral_end--;
+				}
+				if (neutral_end >= 0 && word_dirs[neutral_end] == DIR_LEFT_TO_RIGHT) {
+					// LTR followed by neutrals.
+					// Scan for the beginning of the minor left-to-right run.
+					int left = neutral_end;
+					for (int i = left; i >= 0 && word_dirs[i] != DIR_RIGHT_TO_LEFT; i--) {
+						if (word_dirs[i] == DIR_LEFT_TO_RIGHT) left = i;
+					}
+					reading_order->push_back(kMinorRunStart);
+					for (int i = left; i < word_dirs.size(); i++) {
+						reading_order->push_back(i);
+						if (word_dirs[i] == DIR_MIX) reading_order->push_back(kComplexWord);
+					}
+					reading_order->push_back(kMinorRunEnd);
+					start = left - 1;
+				}
+			}
+		}
+		for (int i = start; i != end;) {
+			if (word_dirs[i] == minor_direction) {
+				int j = i;
+				while (j != end && word_dirs[j] != major_direction)
+					j += major_step;
+				if (j == end) j -= major_step;
+				while (j != i && word_dirs[j] != minor_direction)
+					j -= major_step;
+				//  [j..i] is a minor direction run.
+				reading_order->push_back(kMinorRunStart);
+				for (int k = j; k != i; k -= major_step) {
+					reading_order->push_back(k);
+				}
+				reading_order->push_back(i);
+				reading_order->push_back(kMinorRunEnd);
+				i = j + major_step;
+			}
+			else {
+				reading_order->push_back(i);
+				if (word_dirs[i] == DIR_MIX) reading_order->push_back(kComplexWord);
+				i += major_step;
+			}
+		}
+	}
+
+	int ResultIterator::LTRWordIndex() const {
+		int this_word_index = 0;
+		LTRResultIterator textline(*this);
+		textline.RestartRow();
+		while (!textline.PositionedAtSameWord(it_)) {
+			this_word_index++;
+			textline.Next(RIL_WORD);
+		}
+		return this_word_index;
+	}
+
+	void ResultIterator::MoveToLogicalStartOfWord() {
+		if (word_length_ == 0) {
+			BeginWord(0);
+			return;
+		}
+		GenericVector<int> blob_order;
+		CalculateBlobOrder(&blob_order);
+		if (blob_order.size() == 0 || blob_order[0] == 0) return;
+		BeginWord(blob_order[0]);
+	}
+
+	bool ResultIterator::IsAtFinalSymbolOfWord() const {
+		if (!it_->word()) return true;
+		GenericVector<int> blob_order;
+		CalculateBlobOrder(&blob_order);
+		return blob_order.size() == 0 || blob_order.back() == blob_index_;
+	}
+
+	bool ResultIterator::IsAtFirstSymbolOfWord() const {
+		if (!it_->word()) return true;
+		GenericVector<int> blob_order;
+		CalculateBlobOrder(&blob_order);
+		return blob_order.size() == 0 || blob_order[0] == blob_index_;
+	}
+
+	void ResultIterator::AppendSuffixMarks(STRING *text) const {
+		if (!it_->word()) return;
+		bool reading_direction_is_ltr =
+			current_paragraph_is_ltr_ ^ in_minor_direction_;
+		// scan forward to see what meta-information the word ordering algorithm
+		// left us.
+		// If this word is at the  *end* of a minor run, insert the other
+		// direction's mark;  else if this was a complex word, insert the
+		// current reading order's mark.
+		GenericVectorEqEq<int> textline_order;
+		CalculateTextlineOrder(current_paragraph_is_ltr_,
+			*this, &textline_order);
+		int this_word_index = LTRWordIndex();
+		int i = textline_order.get_index(this_word_index);
+		if (i < 0) return;
+
+		int last_non_word_mark = 0;
+		for (i++; i < textline_order.size() && textline_order[i] < 0; i++) {
+			last_non_word_mark = textline_order[i];
+		}
+		if (last_non_word_mark == kComplexWord) {
+			*text += reading_direction_is_ltr ? kLRM : kRLM;
+		}
+		else if (last_non_word_mark == kMinorRunEnd) {
+			if (current_paragraph_is_ltr_) {
+				*text += kLRM;
+			}
+			else {
+				*text += kRLM;
+			}
+		}
+	}
+
+	void ResultIterator::MoveToLogicalStartOfTextline() {
+		GenericVectorEqEq<int> word_indices;
+		RestartRow();
+		CalculateTextlineOrder(current_paragraph_is_ltr_,
+			dynamic_cast<const LTRResultIterator&>(*this),
+			&word_indices);
+		int i = 0;
+		for (; i < word_indices.size() && word_indices[i] < 0; i++) {
+			if (word_indices[i] == kMinorRunStart) in_minor_direction_ = true;
+			else if (word_indices[i] == kMinorRunEnd) in_minor_direction_ = false;
+		}
+		if (in_minor_direction_) at_beginning_of_minor_run_ = true;
+		if (i >= word_indices.size()) return;
+		int first_word_index = word_indices[i];
+		for (int j = 0; j < first_word_index; j++) {
+			PageIterator::Next(RIL_WORD);
+		}
+		MoveToLogicalStartOfWord();
+	}
+
+	void ResultIterator::Begin() {
+		LTRResultIterator::Begin();
+		current_paragraph_is_ltr_ = CurrentParagraphIsLtr();
+		in_minor_direction_ = false;
+		at_beginning_of_minor_run_ = false;
+		MoveToLogicalStartOfTextline();
+	}
+
+	bool ResultIterator::Next(PageIteratorLevel level) {
+		if (it_->block() == NULL) return false; // already at end!
+		switch (level) {
+		case RIL_BLOCK:  // explicit fall-through
+		case RIL_PARA:   // explicit fall-through
+		case RIL_TEXTLINE:
+			if (!PageIterator::Next(level)) return false;
+			if (IsWithinFirstTextlineOfParagraph()) {
+				// if we've advanced to a new paragraph,
+				// recalculate current_paragraph_is_ltr_
+				current_paragraph_is_ltr_ = CurrentParagraphIsLtr();
+			}
+			in_minor_direction_ = false;
+			MoveToLogicalStartOfTextline();
+			return it_->block() != NULL;
+		case RIL_SYMBOL:
+		{
+			GenericVector<int> blob_order;
+			CalculateBlobOrder(&blob_order);
+			int next_blob = 0;
+			while (next_blob < blob_order.size() &&
+				blob_index_ != blob_order[next_blob])
+				next_blob++;
+			next_blob++;
+			if (next_blob < blob_order.size()) {
+				// we're in the same word; simply advance one blob.
+				BeginWord(blob_order[next_blob]);
+				at_beginning_of_minor_run_ = false;
+				return true;
+			}
+			level = RIL_WORD;  // we've fallen through to the next word.
+		}
+		case RIL_WORD:  // explicit fall-through.
+		{
+			if (it_->word() == NULL) return Next(RIL_BLOCK);
+			GenericVectorEqEq<int> word_indices;
+			int this_word_index = LTRWordIndex();
+			CalculateTextlineOrder(current_paragraph_is_ltr_,
+				*this,
+				&word_indices);
+			int final_real_index = word_indices.size() - 1;
+			while (final_real_index > 0 && word_indices[final_real_index] < 0)
+				final_real_index--;
+			for (int i = 0; i < final_real_index; i++) {
+				if (word_indices[i] == this_word_index) {
+					int j = i + 1;
+					for (; j < final_real_index && word_indices[j] < 0; j++) {
+						if (word_indices[j] == kMinorRunStart) in_minor_direction_ = true;
+						if (word_indices[j] == kMinorRunEnd) in_minor_direction_ = false;
+					}
+					at_beginning_of_minor_run_ = (word_indices[j - 1] == kMinorRunStart);
+					// awesome, we move to word_indices[j]
+					if (BidiDebug(3)) {
+						tprintf("Next(RIL_WORD): %d -> %d\n",
+							this_word_index, word_indices[j]);
+					}
+					PageIterator::RestartRow();
+					for (int k = 0; k < word_indices[j]; k++) {
+						PageIterator::Next(RIL_WORD);
+					}
+					MoveToLogicalStartOfWord();
+					return true;
+				}
+			}
+			if (BidiDebug(3)) {
+				tprintf("Next(RIL_WORD): %d -> EOL\n", this_word_index);
+			}
+			// we're going off the end of the text line.
+			return Next(RIL_TEXTLINE);
+		}
+		}
+		ASSERT_HOST(false);  // shouldn't happen.
+		return false;
+	}
+
+	bool ResultIterator::IsAtBeginningOf(PageIteratorLevel level) const {
+		if (it_->block() == NULL) return false;  // Already at the end!
+		if (it_->word() == NULL) return true;  // In an image block.
+		if (level == RIL_SYMBOL) return true;  // Always at beginning of a symbol.
+
+		bool at_word_start = IsAtFirstSymbolOfWord();
+		if (level == RIL_WORD) return at_word_start;
+
+		ResultIterator line_start(*this);
+		// move to the first word in the line...
+		line_start.MoveToLogicalStartOfTextline();
+
+		bool at_textline_start = at_word_start && *line_start.it_ == *it_;
+		if (level == RIL_TEXTLINE) return at_textline_start;
+
+		// now we move to the left-most word...
+		line_start.RestartRow();
+		bool at_block_start = at_textline_start &&
+			line_start.it_->block() != line_start.it_->prev_block();
+		if (level == RIL_BLOCK) return at_block_start;
+
+		bool at_para_start = at_block_start ||
+			(at_textline_start &&
+				line_start.it_->row()->row->para() !=
+				line_start.it_->prev_row()->row->para());
+		if (level == RIL_PARA) return at_para_start;
+
+		ASSERT_HOST(false);  // shouldn't happen.
+		return false;
+	}
+
+	/**
+	 * NOTE! This is an exact copy of PageIterator::IsAtFinalElement with the
+	 *   change that the variable next is now a ResultIterator instead of a
+	 *   PageIterator.
+	 */
+	bool ResultIterator::IsAtFinalElement(PageIteratorLevel level,
+		PageIteratorLevel element) const {
+		if (Empty(element)) return true;  // Already at the end!
+		// The result is true if we step forward by element and find we are
+		// at the the end of the page or at beginning of *all* levels in:
+		// [level, element).
+		// When there is more than one level difference between element and level,
+		// we could for instance move forward one symbol and still be at the first
+		// word on a line, so we also have to be at the first symbol in a word.
+		ResultIterator next(*this);
+		next.Next(element);
+		if (next.Empty(element)) return true;  // Reached the end of the page.
+		while (element > level) {
+			element = static_cast<PageIteratorLevel>(element - 1);
+			if (!next.IsAtBeginningOf(element))
+				return false;
+		}
+		return true;
+	}
+
+	/**
+	 * Returns the null terminated UTF-8 encoded text string for the current
+	 * object at the given level. Use delete [] to free after use.
+	 */
+	char* ResultIterator::GetUTF8Text(PageIteratorLevel level) const {
+		if (it_->word() == NULL) return NULL;  // Already at the end!
+		STRING text;
+		switch (level) {
+		case RIL_BLOCK:
+		{
+			ResultIterator pp(*this);
+			do {
+				pp.AppendUTF8ParagraphText(&text);
+			} while (pp.Next(RIL_PARA) && pp.it_->block() == it_->block());
+		}
+		break;
+		case RIL_PARA:
+			AppendUTF8ParagraphText(&text);
+			break;
+		case RIL_TEXTLINE:
+		{
+			ResultIterator it(*this);
+			it.MoveToLogicalStartOfTextline();
+			it.IterateAndAppendUTF8TextlineText(&text);
+		}
+		break;
+		case RIL_WORD:
+			AppendUTF8WordText(&text);
+			break;
+		case RIL_SYMBOL:
+		{
+			bool reading_direction_is_ltr =
+				current_paragraph_is_ltr_ ^ in_minor_direction_;
+			if (at_beginning_of_minor_run_) {
+				text += reading_direction_is_ltr ? kLRM : kRLM;
+			}
+			text = it_->word()->BestUTF8(blob_index_, !reading_direction_is_ltr);
+			if (IsAtFinalSymbolOfWord()) AppendSuffixMarks(&text);
+		}
+		break;
+		}
+		int length = text.length() + 1;
+		char* result = new char[length];
+		strncpy(result, text.string(), length);
+		return result;
+	}
+
+	void ResultIterator::AppendUTF8WordText(STRING *text) const {
+		if (!it_->word()) return;
+		ASSERT_HOST(it_->word()->best_choice != NULL);
+		bool reading_direction_is_ltr =
+			current_paragraph_is_ltr_ ^ in_minor_direction_;
+		if (at_beginning_of_minor_run_) {
+			*text += reading_direction_is_ltr ? kLRM : kRLM;
+		}
+
+		GenericVector<int> blob_order;
+		CalculateBlobOrder(&blob_order);
+		for (int i = 0; i < blob_order.size(); i++) {
+			*text += it_->word()->BestUTF8(blob_order[i], !reading_direction_is_ltr);
+		}
+		AppendSuffixMarks(text);
+	}
+
+	void ResultIterator::IterateAndAppendUTF8TextlineText(STRING *text) {
+		if (Empty(RIL_WORD)) {
+			Next(RIL_WORD);
+			return;
+		}
+		if (BidiDebug(1)) {
+			GenericVectorEqEq<int> textline_order;
+			GenericVector<StrongScriptDirection> dirs;
+			CalculateTextlineOrder(current_paragraph_is_ltr_,
+				*this, &dirs, &textline_order);
+			tprintf("Strong Script dirs     [%p/P=%s]: ", it_->row(),
+				current_paragraph_is_ltr_ ? "ltr" : "rtl");
+			PrintScriptDirs(dirs);
+			tprintf("Logical textline order [%p/P=%s]: ", it_->row(),
+				current_paragraph_is_ltr_ ? "ltr" : "rtl");
+			for (int i = 0; i < textline_order.size(); i++) {
+				tprintf("%d ", textline_order[i]);
+			}
+			tprintf("\n");
+		}
+
+		int words_appended = 0;
+		do {
+			int numSpaces = preserve_interword_spaces_ ? it_->word()->word->space()
+				: (words_appended > 0);
+			for (int i = 0; i < numSpaces; ++i) {
+				*text += " ";
+			}
+			AppendUTF8WordText(text);
+			words_appended++;
+		} while (Next(RIL_WORD) && !IsAtBeginningOf(RIL_TEXTLINE));
+		if (BidiDebug(1)) {
+			tprintf("%d words printed\n", words_appended);
+		}
+		*text += line_separator_;
+		// If we just finished a paragraph, add an extra newline.
+		if (it_->block() == NULL || IsAtBeginningOf(RIL_PARA))
+			*text += paragraph_separator_;
+	}
+
+	void ResultIterator::AppendUTF8ParagraphText(STRING *text) const {
+		ResultIterator it(*this);
+		it.RestartParagraph();
+		it.MoveToLogicalStartOfTextline();
+		if (it.Empty(RIL_WORD)) return;
+		do {
+			it.IterateAndAppendUTF8TextlineText(text);
+		} while (it.it_->block() != NULL && !it.IsAtBeginningOf(RIL_PARA));
+	}
+
+	bool ResultIterator::BidiDebug(int min_level) const {
+		int debug_level = 1;
+		IntParam *p = ParamUtils::FindParam<IntParam>(
+			"bidi_debug", GlobalParams()->int_params,
+			tesseract_->params()->int_params);
+		if (p != NULL) debug_level = (inT32)(*p);
+		return debug_level >= min_level;
+	}
+
+}  // namespace tesseract.
--- a/hgdriver/3rdparty/hgOCR/include/ccmain/resultiterator.h
+++ b/hgdriver/3rdparty/hgOCR/include/ccmain/resultiterator.h
@ -0,0 +1,244 @@
+///////////////////////////////////////////////////////////////////////
+// File:        resultiterator.h
+// Description: Iterator for tesseract results that is capable of
+//              iterating in proper reading order over Bi Directional
+//              (e.g. mixed Hebrew and English) text.
+// Author:      David Eger
+// Created:     Fri May 27 13:58:06 PST 2011
+//
+// (C) Copyright 2011, Google Inc.
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+// http://www.apache.org/licenses/LICENSE-2.0
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+///////////////////////////////////////////////////////////////////////
+
+#ifndef TESSERACT_CCMAIN_RESULT_ITERATOR_H__
+#define TESSERACT_CCMAIN_RESULT_ITERATOR_H__
+
+#include "platform.h"
+#include "ltrresultiterator.h"
+
+template <typename T> class GenericVector;
+template <typename T> class GenericVectorEqEq;
+class BLOB_CHOICE_IT;
+class WERD_RES;
+class STRING;
+
+namespace tesseract {
+
+	class Tesseract;
+
+	class TESS_API ResultIterator : public LTRResultIterator {
+	public:
+		static ResultIterator *StartOfParagraph(const LTRResultIterator &resit);
+
+		/**
+		 * ResultIterator is copy constructible!
+		 * The default copy constructor works just fine for us.
+		 */
+		virtual ~ResultIterator() {}
+
+		// ============= Moving around within the page ============.
+		/**
+		 * Moves the iterator to point to the start of the page to begin
+		 * an iteration.
+		 */
+		virtual void Begin();
+
+		/**
+		 * Moves to the start of the next object at the given level in the
+		 * page hierarchy in the appropriate reading order and returns false if
+		 * the end of the page was reached.
+		 * NOTE that RIL_SYMBOL will skip non-text blocks, but all other
+		 * PageIteratorLevel level values will visit each non-text block once.
+		 * Think of non text blocks as containing a single para, with a single line,
+		 * with a single imaginary word.
+		 * Calls to Next with different levels may be freely intermixed.
+		 * This function iterates words in right-to-left scripts correctly, if
+		 * the appropriate language has been loaded into Tesseract.
+		 */
+		virtual bool Next(PageIteratorLevel level);
+
+		/**
+		 * IsAtBeginningOf() returns whether we're at the logical beginning of the
+		 * given level.  (as opposed to ResultIterator's left-to-right top-to-bottom
+		 * order).  Otherwise, this acts the same as PageIterator::IsAtBeginningOf().
+		 * For a full description, see pageiterator.h
+		 */
+		virtual bool IsAtBeginningOf(PageIteratorLevel level) const;
+
+		/**
+		 * Implement PageIterator's IsAtFinalElement correctly in a BiDi context.
+		 * For instance, IsAtFinalElement(RIL_PARA, RIL_WORD) returns whether we
+		 * point at the last word in a paragraph.  See PageIterator for full comment.
+		*/
+		virtual bool IsAtFinalElement(PageIteratorLevel level,
+			PageIteratorLevel element) const;
+
+		// ============= Accessing data ==============.
+
+		/**
+		 * Returns the null terminated UTF-8 encoded text string for the current
+		 * object at the given level. Use delete [] to free after use.
+		*/
+		virtual char* GetUTF8Text(PageIteratorLevel level) const;
+
+		/**
+		 * Return whether the current paragraph's dominant reading direction
+		 * is left-to-right (as opposed to right-to-left).
+		*/
+		bool ParagraphIsLtr() const;
+
+		// ============= Exposed only for testing =============.
+
+		/**
+		 * Yields the reading order as a sequence of indices and (optional)
+		 * meta-marks for a set of words (given left-to-right).
+		 * The meta marks are passed as negative values:
+		 *   kMinorRunStart  Start of minor direction text.
+		 *   kMinorRunEnd    End of minor direction text.
+		 *   kComplexWord    The next indexed word contains both left-to-right and
+		 *                    right-to-left characters and was treated as neutral.
+		 *
+		 * For example, suppose we have five words in a text line,
+		 * indexed [0,1,2,3,4] from the leftmost side of the text line.
+		 * The following are all believable reading_orders:
+		 *
+		 * Left-to-Right (in ltr paragraph):
+		 *     { 0, 1, 2, 3, 4 }
+		 * Left-to-Right (in rtl paragraph):
+		 *     { kMinorRunStart, 0, 1, 2, 3, 4, kMinorRunEnd }
+		 * Right-to-Left (in rtl paragraph):
+		 *     { 4, 3, 2, 1, 0 }
+		 * Left-to-Right except for an RTL phrase in words 2, 3 in an ltr paragraph:
+		 *     { 0, 1, kMinorRunStart, 3, 2, kMinorRunEnd, 4 }
+		 */
+		static void CalculateTextlineOrder(
+			bool paragraph_is_ltr,
+			const GenericVector<StrongScriptDirection> &word_dirs,
+			GenericVectorEqEq<int> *reading_order);
+
+		static const int kMinorRunStart;
+		static const int kMinorRunEnd;
+		static const int kComplexWord;
+
+	protected:
+		/**
+		 * We presume the data associated with the given iterator will outlive us.
+		 * NB: This is private because it does something that is non-obvious:
+		 *   it resets to the beginning of the paragraph instead of staying wherever
+		 *   resit might have pointed.
+		 */
+		TESS_LOCAL explicit ResultIterator(const LTRResultIterator &resit);
+
+	private:
+		/**
+		 * Calculates the current paragraph's dominant writing direction.
+		 * Typically, members should use current_paragraph_ltr_ instead.
+		 */
+		bool CurrentParagraphIsLtr() const;
+
+		/**
+		 * Returns word indices as measured from resit->RestartRow() = index 0
+		 * for the reading order of words within a textline given an iterator
+		 * into the middle of the text line.
+		 * In addition to non-negative word indices, the following negative values
+		 * may be inserted:
+		 *   kMinorRunStart  Start of minor direction text.
+		 *   kMinorRunEnd    End of minor direction text.
+		 *   kComplexWord    The previous word contains both left-to-right and
+		 *                   right-to-left characters and was treated as neutral.
+		 */
+		void CalculateTextlineOrder(bool paragraph_is_ltr,
+			const LTRResultIterator &resit,
+			GenericVectorEqEq<int> *indices) const;
+		/** Same as above, but the caller's ssd gets filled in if ssd != NULL. */
+		void CalculateTextlineOrder(bool paragraph_is_ltr,
+			const LTRResultIterator &resit,
+			GenericVector<StrongScriptDirection> *ssd,
+			GenericVectorEqEq<int> *indices) const;
+
+		/**
+		 * What is the index of the current word in a strict left-to-right reading
+		 * of the row?
+		 */
+		int LTRWordIndex() const;
+
+		/**
+		 * Given an iterator pointing at a word, returns the logical reading order
+		 * of blob indices for the word.
+		 */
+		void CalculateBlobOrder(GenericVector<int> *blob_indices) const;
+
+		/** Precondition: current_paragraph_is_ltr_ is set. */
+		void MoveToLogicalStartOfTextline();
+
+		/**
+		 * Precondition: current_paragraph_is_ltr_ and in_minor_direction_
+		 * are set.
+		 */
+		void MoveToLogicalStartOfWord();
+
+		/** Are we pointing at the final (reading order) symbol of the word? */
+		bool IsAtFinalSymbolOfWord() const;
+
+		/** Are we pointing at the first (reading order) symbol of the word? */
+		bool IsAtFirstSymbolOfWord() const;
+
+		/**
+		 * Append any extra marks that should be appended to this word when printed.
+		 * Mostly, these are Unicode BiDi control characters.
+		 */
+		void AppendSuffixMarks(STRING *text) const;
+
+		/** Appends the current word in reading order to the given buffer.*/
+		void AppendUTF8WordText(STRING *text) const;
+
+		/**
+		 * Appends the text of the current text line, *assuming this iterator is
+		 * positioned at the beginning of the text line*  This function
+		 * updates the iterator to point to the first position past the text line.
+		 * Each textline is terminated in a single newline character.
+		 * If the textline ends a paragraph, it gets a second terminal newline.
+		 */
+		void IterateAndAppendUTF8TextlineText(STRING *text);
+
+		/**
+		 * Appends the text of the current paragraph in reading order
+		 * to the given buffer.
+		 * Each textline is terminated in a single newline character, and the
+		 * paragraph gets an extra newline at the end.
+		 */
+		void AppendUTF8ParagraphText(STRING *text) const;
+
+		/** Returns whether the bidi_debug flag is set to at least min_level. */
+		bool BidiDebug(int min_level) const;
+
+		bool current_paragraph_is_ltr_;
+
+		/**
+		 * Is the currently pointed-at character at the beginning of
+		 * a minor-direction run?
+		 */
+		bool at_beginning_of_minor_run_;
+
+		/** Is the currently pointed-at character in a minor-direction sequence? */
+		bool in_minor_direction_;
+
+		/**
+		 * Should detected inter-word spaces be preserved, or "compressed" to a single
+		 * space character (default behavior).
+		 */
+		bool preserve_interword_spaces_;
+	};
+
+}  // namespace tesseract.
+
+#endif  // TESSERACT_CCMAIN_RESULT_ITERATOR_H__
--- a/hgdriver/3rdparty/hgOCR/include/ccmain/superscript.cpp
+++ b/hgdriver/3rdparty/hgOCR/include/ccmain/superscript.cpp
@ -0,0 +1,619 @@
+/******************************************************************
+ * File:        superscript.cpp
+ * Description: Correction pass to fix superscripts and subscripts.
+ * Author:      David Eger
+ * Created:     Mon Mar 12 14:05:00 PDT 2012
+ *
+ * (C) Copyright 2012, Google, Inc.
+ ** Licensed under the Apache License, Version 2.0 (the "License");
+ ** you may not use this file except in compliance with the License.
+ ** You may obtain a copy of the License at
+ ** http://www.apache.org/licenses/LICENSE-2.0
+ ** Unless required by applicable law or agreed to in writing, software
+ ** distributed under the License is distributed on an "AS IS" BASIS,
+ ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ ** See the License for the specific language governing permissions and
+ ** limitations under the License.
+ *
+ **********************************************************************/
+
+#include "normalis.h"
+#include "tesseractclass.h"
+
+static int LeadingUnicharsToChopped(WERD_RES *word, int num_unichars) {
+	int num_chopped = 0;
+	for (int i = 0; i < num_unichars; i++)
+		num_chopped += word->best_state[i];
+	return num_chopped;
+}
+
+static int TrailingUnicharsToChopped(WERD_RES *word, int num_unichars) {
+	int num_chopped = 0;
+	for (int i = 0; i < num_unichars; i++)
+		num_chopped += word->best_state[word->best_state.size() - 1 - i];
+	return num_chopped;
+}
+
+
+namespace tesseract {
+
+	/**
+	 * Given a recognized blob, see if a contiguous collection of sub-pieces
+	 * (chopped blobs) starting at its left might qualify as being a subscript
+	 * or superscript letter based only on y position.  Also do this for the
+	 * right side.
+	 */
+	void YOutlierPieces(WERD_RES *word, int rebuilt_blob_index,
+		int super_y_bottom, int sub_y_top,
+		ScriptPos *leading_pos, int *num_leading_outliers,
+		ScriptPos *trailing_pos, int *num_trailing_outliers) {
+		ScriptPos sp_unused1, sp_unused2;
+		int unused1, unused2;
+		if (!leading_pos) leading_pos = &sp_unused1;
+		if (!num_leading_outliers) num_leading_outliers = &unused1;
+		if (!trailing_pos) trailing_pos = &sp_unused2;
+		if (!num_trailing_outliers) num_trailing_outliers = &unused2;
+
+		*num_leading_outliers = *num_trailing_outliers = 0;
+		*leading_pos = *trailing_pos = SP_NORMAL;
+
+		int chopped_start = LeadingUnicharsToChopped(word, rebuilt_blob_index);
+		int num_chopped_pieces = word->best_state[rebuilt_blob_index];
+		ScriptPos last_pos = SP_NORMAL;
+		int trailing_outliers = 0;
+		for (int i = 0; i < num_chopped_pieces; i++) {
+			TBOX box = word->chopped_word->blobs[chopped_start + i]->bounding_box();
+			ScriptPos pos = SP_NORMAL;
+			if (box.bottom() >= super_y_bottom) {
+				pos = SP_SUPERSCRIPT;
+			}
+			else if (box.top() <= sub_y_top) {
+				pos = SP_SUBSCRIPT;
+			}
+			if (pos == SP_NORMAL) {
+				if (trailing_outliers == i) {
+					*num_leading_outliers = trailing_outliers;
+					*leading_pos = last_pos;
+				}
+				trailing_outliers = 0;
+			}
+			else {
+				if (pos == last_pos) {
+					trailing_outliers++;
+				}
+				else {
+					trailing_outliers = 1;
+				}
+			}
+			last_pos = pos;
+		}
+		*num_trailing_outliers = trailing_outliers;
+		*trailing_pos = last_pos;
+	}
+
+	/**
+	 * Attempt to split off any high (or low) bits at the ends of the word with poor
+	 * certainty and recognize them separately.  If the certainty gets much better
+	 * and other sanity checks pass, acccept.
+	 *
+	 * This superscript fix is meant to be called in the second pass of recognition
+	 * when we have tried once and already have a preliminary answer for word.
+	 *
+	 * @return Whether we modified the given word.
+	 */
+	bool Tesseract::SubAndSuperscriptFix(WERD_RES *word) {
+		if (word->tess_failed || word->word->flag(W_REP_CHAR) ||
+			!word->best_choice) {
+			return false;
+		}
+		int num_leading, num_trailing;
+		ScriptPos sp_leading, sp_trailing;
+		float leading_certainty, trailing_certainty;
+		float avg_certainty, unlikely_threshold;
+
+		// Calculate the number of whole suspicious characters at the edges.
+		GetSubAndSuperscriptCandidates(
+			word, &num_leading, &sp_leading, &leading_certainty,
+			&num_trailing, &sp_trailing, &trailing_certainty,
+			&avg_certainty, &unlikely_threshold);
+
+		const char *leading_pos = sp_leading == SP_SUBSCRIPT ? "sub" : "super";
+		const char *trailing_pos = sp_trailing == SP_SUBSCRIPT ? "sub" : "super";
+
+		int num_blobs = word->best_choice->length();
+
+		// Calculate the remainder (partial characters) at the edges.
+		// This accounts for us having classified the best version of
+		// a word as [speaker?'] when it was instead [speaker.^{21}]
+		// (that is we accidentally thought the 2 was attached to the period).
+		int num_remainder_leading = 0, num_remainder_trailing = 0;
+		if (num_leading + num_trailing < num_blobs && unlikely_threshold < 0.0) {
+			int super_y_bottom =
+				kBlnBaselineOffset + kBlnXHeight * superscript_min_y_bottom;
+			int sub_y_top =
+				kBlnBaselineOffset + kBlnXHeight * subscript_max_y_top;
+			int last_word_char = num_blobs - 1 - num_trailing;
+			float last_char_certainty = word->best_choice->certainty(last_word_char);
+			if (word->best_choice->unichar_id(last_word_char) != 0 &&
+				last_char_certainty <= unlikely_threshold) {
+				ScriptPos rpos;
+				YOutlierPieces(word, last_word_char, super_y_bottom, sub_y_top,
+					NULL, NULL, &rpos, &num_remainder_trailing);
+				if (num_trailing > 0 && rpos != sp_trailing) num_remainder_trailing = 0;
+				if (num_remainder_trailing > 0 &&
+					last_char_certainty < trailing_certainty) {
+					trailing_certainty = last_char_certainty;
+				}
+			}
+			bool another_blob_available = (num_remainder_trailing == 0) ||
+				num_leading + num_trailing + 1 < num_blobs;
+			int first_char_certainty = word->best_choice->certainty(num_leading);
+			if (another_blob_available &&
+				word->best_choice->unichar_id(num_leading) != 0 &&
+				first_char_certainty <= unlikely_threshold) {
+				ScriptPos lpos;
+				YOutlierPieces(word, num_leading, super_y_bottom, sub_y_top,
+					&lpos, &num_remainder_leading, NULL, NULL);
+				if (num_leading > 0 && lpos != sp_leading) num_remainder_leading = 0;
+				if (num_remainder_leading > 0 &&
+					first_char_certainty < leading_certainty) {
+					leading_certainty = first_char_certainty;
+				}
+			}
+		}
+
+		// If nothing to do, bail now.
+		if (num_leading + num_trailing +
+			num_remainder_leading + num_remainder_trailing == 0) {
+			return false;
+		}
+
+		if (superscript_debug >= 1) {
+			tprintf("Candidate for superscript detection: %s (",
+				word->best_choice->unichar_string().string());
+			if (num_leading || num_remainder_leading) {
+				tprintf("%d.%d %s-leading ", num_leading, num_remainder_leading,
+					leading_pos);
+			}
+			if (num_trailing || num_remainder_trailing) {
+				tprintf("%d.%d %s-trailing ", num_trailing, num_remainder_trailing,
+					trailing_pos);
+			}
+			tprintf(")\n");
+		}
+		if (superscript_debug >= 3) {
+			word->best_choice->print();
+		}
+		if (superscript_debug >= 2) {
+			tprintf(" Certainties -- Average: %.2f  Unlikely thresh: %.2f  ",
+				avg_certainty, unlikely_threshold);
+			if (num_leading)
+				tprintf("Orig. leading (min): %.2f  ", leading_certainty);
+			if (num_trailing)
+				tprintf("Orig. trailing (min): %.2f  ", trailing_certainty);
+			tprintf("\n");
+		}
+
+		// We've now calculated the number of rebuilt blobs we want to carve off.
+		// However, split_word() works from TBLOBs in chopped_word, so we need to
+		// convert to those.
+		int num_chopped_leading =
+			LeadingUnicharsToChopped(word, num_leading) + num_remainder_leading;
+		int num_chopped_trailing =
+			TrailingUnicharsToChopped(word, num_trailing) + num_remainder_trailing;
+
+		int retry_leading = 0;
+		int retry_trailing = 0;
+		bool is_good = false;
+		WERD_RES *revised = TrySuperscriptSplits(
+			num_chopped_leading, leading_certainty, sp_leading,
+			num_chopped_trailing, trailing_certainty, sp_trailing,
+			word, &is_good, &retry_leading, &retry_trailing);
+		if (is_good) {
+			word->ConsumeWordResults(revised);
+		}
+		else if (retry_leading || retry_trailing) {
+			int retry_chopped_leading =
+				LeadingUnicharsToChopped(revised, retry_leading);
+			int retry_chopped_trailing =
+				TrailingUnicharsToChopped(revised, retry_trailing);
+			WERD_RES *revised2 = TrySuperscriptSplits(
+				retry_chopped_leading, leading_certainty, sp_leading,
+				retry_chopped_trailing, trailing_certainty, sp_trailing,
+				revised, &is_good, &retry_leading, &retry_trailing);
+			if (is_good) {
+				word->ConsumeWordResults(revised2);
+			}
+			delete revised2;
+		}
+		delete revised;
+		return is_good;
+	}
+
+	/**
+	 * Determine how many characters (rebuilt blobs) on each end of a given word
+	 * might plausibly be superscripts so SubAndSuperscriptFix can try to
+	 * re-recognize them.  Even if we find no whole blobs at either end,
+	 * we will set *unlikely_threshold to a certainty that might be used to
+	 * select "bad enough" outlier characters.  If *unlikely_threshold is set to 0,
+	 * though, there's really no hope.
+	 *
+	 * @param[in]  word    The word to examine.
+	 * @param[out] num_rebuilt_leading   the number of rebuilt blobs at the start
+	 *                                   of the word which are all up or down and
+	 *                                   seem badly classified.
+	 * @param[out] leading_pos        "super" or "sub" (for debugging)
+	 * @param[out] leading_certainty  the worst certainty in the leading blobs.
+	 * @param[out] num_rebuilt_trailing   the number of rebuilt blobs at the end
+	 *                                    of the word which are all up or down and
+	 *                                    seem badly classified.
+	 * @param[out] trailing_pos        "super" or "sub" (for debugging)
+	 * @param[out] trailing_certainty  the worst certainty in the trailing blobs.
+	 * @param[out] avg_certainty       the average certainty of "normal" blobs in
+	 *                                 the word.
+	 * @param[out] unlikely_threshold  the threshold (on certainty) we used to
+	 *                                 select "bad enough" outlier characters.
+	 */
+	void Tesseract::GetSubAndSuperscriptCandidates(const WERD_RES *word,
+		int *num_rebuilt_leading,
+		ScriptPos *leading_pos,
+		float *leading_certainty,
+		int *num_rebuilt_trailing,
+		ScriptPos *trailing_pos,
+		float *trailing_certainty,
+		float *avg_certainty,
+		float *unlikely_threshold) {
+		*avg_certainty = *unlikely_threshold = 0.0f;
+		*num_rebuilt_leading = *num_rebuilt_trailing = 0;
+		*leading_certainty = *trailing_certainty = 0.0f;
+
+		int super_y_bottom =
+			kBlnBaselineOffset + kBlnXHeight * superscript_min_y_bottom;
+		int sub_y_top =
+			kBlnBaselineOffset + kBlnXHeight * subscript_max_y_top;
+
+		// Step one: Get an average certainty for "normally placed" characters.
+
+		// Counts here are of blobs in the rebuild_word / unichars in best_choice.
+		*leading_pos = *trailing_pos = SP_NORMAL;
+		int leading_outliers = 0;
+		int trailing_outliers = 0;
+		int num_normal = 0;
+		float normal_certainty_total = 0.0f;
+		float worst_normal_certainty = 0.0f;
+		ScriptPos last_pos = SP_NORMAL;
+		int num_blobs = word->rebuild_word->NumBlobs();
+		for (int b = 0; b < num_blobs; ++b) {
+			TBOX box = word->rebuild_word->blobs[b]->bounding_box();
+			ScriptPos pos = SP_NORMAL;
+			if (box.bottom() >= super_y_bottom) {
+				pos = SP_SUPERSCRIPT;
+			}
+			else if (box.top() <= sub_y_top) {
+				pos = SP_SUBSCRIPT;
+			}
+			if (pos == SP_NORMAL) {
+				if (word->best_choice->unichar_id(b) != 0) {
+					float char_certainty = word->best_choice->certainty(b);
+					if (char_certainty < worst_normal_certainty) {
+						worst_normal_certainty = char_certainty;
+					}
+					num_normal++;
+					normal_certainty_total += char_certainty;
+				}
+				if (trailing_outliers == b) {
+					leading_outliers = trailing_outliers;
+					*leading_pos = last_pos;
+				}
+				trailing_outliers = 0;
+			}
+			else {
+				if (last_pos == pos) {
+					trailing_outliers++;
+				}
+				else {
+					trailing_outliers = 1;
+				}
+			}
+			last_pos = pos;
+		}
+		*trailing_pos = last_pos;
+		if (num_normal >= 3) {  // throw out the worst as an outlier.
+			num_normal--;
+			normal_certainty_total -= worst_normal_certainty;
+		}
+		if (num_normal > 0) {
+			*avg_certainty = normal_certainty_total / num_normal;
+			*unlikely_threshold = superscript_worse_certainty * (*avg_certainty);
+		}
+		if (num_normal == 0 ||
+			(leading_outliers == 0 && trailing_outliers == 0)) {
+			return;
+		}
+
+		// Step two: Try to split off bits of the word that are both outliers
+		//           and have much lower certainty than average
+		// Calculate num_leading and leading_certainty.
+		for (*leading_certainty = 0.0f, *num_rebuilt_leading = 0;
+			*num_rebuilt_leading < leading_outliers;
+			(*num_rebuilt_leading)++) {
+			float char_certainty = word->best_choice->certainty(*num_rebuilt_leading);
+			if (char_certainty > *unlikely_threshold) {
+				break;
+			}
+			if (char_certainty < *leading_certainty) {
+				*leading_certainty = char_certainty;
+			}
+		}
+
+		// Calculate num_trailing and trailing_certainty.
+		for (*trailing_certainty = 0.0f, *num_rebuilt_trailing = 0;
+			*num_rebuilt_trailing < trailing_outliers;
+			(*num_rebuilt_trailing)++) {
+			int blob_idx = num_blobs - 1 - *num_rebuilt_trailing;
+			float char_certainty = word->best_choice->certainty(blob_idx);
+			if (char_certainty > *unlikely_threshold) {
+				break;
+			}
+			if (char_certainty < *trailing_certainty) {
+				*trailing_certainty = char_certainty;
+			}
+		}
+	}
+
+
+	/**
+	 * Try splitting off the given number of (chopped) blobs from the front and
+	 * back of the given word and recognizing the pieces.
+	 *
+	 * @param[in]  num_chopped_leading   how many chopped blobs from the left
+	 *                    end of the word to chop off and try recognizing as a
+	 *                    superscript (or subscript)
+	 * @param[in]  leading_certainty     the (minimum) certainty had by the
+	 *                    characters in the original leading section.
+	 * @param[in]  leading_pos    "super" or "sub" (for debugging)
+	 * @param[in]  num_chopped_trailing  how many chopped blobs from the right
+	 *                    end of the word to chop off and try recognizing as a
+	 *                    superscript (or subscript)
+	 * @param[in]  trailing_certainty    the (minimum) certainty had by the
+	 *                    characters in the original trailing section.
+	 * @param[in]  trailing_pos      "super" or "sub" (for debugging)
+	 * @param[in]  word              the word to try to chop up.
+	 * @param[out] is_good           do we believe our result?
+	 * @param[out] retry_rebuild_leading, retry_rebuild_trailing
+	 *         If non-zero, and !is_good, then the caller may have luck trying
+	 *         to split the returned word with this number of (rebuilt) leading
+	 *         and trailing blobs / unichars.
+	 * @return A word which is the result of re-recognizing as asked.
+	 */
+	WERD_RES *Tesseract::TrySuperscriptSplits(
+		int num_chopped_leading, float leading_certainty, ScriptPos leading_pos,
+		int num_chopped_trailing, float trailing_certainty,
+		ScriptPos trailing_pos,
+		WERD_RES *word,
+		bool *is_good,
+		int *retry_rebuild_leading, int *retry_rebuild_trailing) {
+		int num_chopped = word->chopped_word->NumBlobs();
+
+		*retry_rebuild_leading = *retry_rebuild_trailing = 0;
+
+		// Chop apart the word into up to three pieces.
+
+		BlamerBundle *bb0 = NULL;
+		BlamerBundle *bb1 = NULL;
+		WERD_RES *prefix = NULL;
+		WERD_RES *core = NULL;
+		WERD_RES *suffix = NULL;
+		if (num_chopped_leading > 0) {
+			prefix = new WERD_RES(*word);
+			split_word(prefix, num_chopped_leading, &core, &bb0);
+		}
+		else {
+			core = new WERD_RES(*word);
+		}
+
+		if (num_chopped_trailing > 0) {
+			int split_pt = num_chopped - num_chopped_trailing - num_chopped_leading;
+			split_word(core, split_pt, &suffix, &bb1);
+		}
+
+		//  Recognize the pieces in turn.
+		int saved_cp_multiplier = classify_class_pruner_multiplier;
+		int saved_im_multiplier = classify_integer_matcher_multiplier;
+		if (prefix) {
+			// Turn off Tesseract's y-position penalties for the leading superscript.
+			classify_class_pruner_multiplier.set_value(0);
+			classify_integer_matcher_multiplier.set_value(0);
+
+			// Adjust our expectations about the baseline for this prefix.
+			if (superscript_debug >= 3) {
+				tprintf(" recognizing first %d chopped blobs\n", num_chopped_leading);
+			}
+			recog_word_recursive(prefix);
+			if (superscript_debug >= 2) {
+				tprintf(" The leading bits look like %s %s\n",
+					ScriptPosToString(leading_pos),
+					prefix->best_choice->unichar_string().string());
+			}
+
+			// Restore the normal y-position penalties.
+			classify_class_pruner_multiplier.set_value(saved_cp_multiplier);
+			classify_integer_matcher_multiplier.set_value(saved_im_multiplier);
+		}
+
+		if (superscript_debug >= 3) {
+			tprintf(" recognizing middle %d chopped blobs\n",
+				num_chopped - num_chopped_leading - num_chopped_trailing);
+		}
+
+		if (suffix) {
+			// Turn off Tesseract's y-position penalties for the trailing superscript.
+			classify_class_pruner_multiplier.set_value(0);
+			classify_integer_matcher_multiplier.set_value(0);
+
+			if (superscript_debug >= 3) {
+				tprintf(" recognizing last %d chopped blobs\n", num_chopped_trailing);
+			}
+			recog_word_recursive(suffix);
+			if (superscript_debug >= 2) {
+				tprintf(" The trailing bits look like %s %s\n",
+					ScriptPosToString(trailing_pos),
+					suffix->best_choice->unichar_string().string());
+			}
+
+			// Restore the normal y-position penalties.
+			classify_class_pruner_multiplier.set_value(saved_cp_multiplier);
+			classify_integer_matcher_multiplier.set_value(saved_im_multiplier);
+		}
+
+		// Evaluate whether we think the results are believably better
+		// than what we already had.
+		bool good_prefix = !prefix || BelievableSuperscript(
+			superscript_debug >= 1, *prefix,
+			superscript_bettered_certainty * leading_certainty,
+			retry_rebuild_leading, NULL);
+		bool good_suffix = !suffix || BelievableSuperscript(
+			superscript_debug >= 1, *suffix,
+			superscript_bettered_certainty * trailing_certainty,
+			NULL, retry_rebuild_trailing);
+
+		*is_good = good_prefix && good_suffix;
+		if (!*is_good && !*retry_rebuild_leading && !*retry_rebuild_trailing) {
+			// None of it is any good. Quit now.
+			delete core;
+			delete prefix;
+			delete suffix;
+			return NULL;
+		}
+		recog_word_recursive(core);
+
+		// Now paste the results together into core.
+		if (suffix) {
+			suffix->SetAllScriptPositions(trailing_pos);
+			join_words(core, suffix, bb1);
+		}
+		if (prefix) {
+			prefix->SetAllScriptPositions(leading_pos);
+			join_words(prefix, core, bb0);
+			core = prefix;
+			prefix = NULL;
+		}
+
+		if (superscript_debug >= 1) {
+			tprintf("%s superscript fix: %s\n", *is_good ? "ACCEPT" : "REJECT",
+				core->best_choice->unichar_string().string());
+		}
+		return core;
+	}
+
+
+	/**
+	 * Return whether this is believable superscript or subscript text.
+	 *
+	 * We insist that:
+	 *   + there are no punctuation marks.
+	 *   + there are no italics.
+	 *   + no normal-sized character is smaller than superscript_scaledown_ratio
+	 *     of what it ought to be, and
+	 *   + each character is at least as certain as certainty_threshold.
+	 *
+	 *  @param[in]  debug  If true, spew debug output
+	 *  @param[in]  word   The word whose best_choice we're evaluating
+	 *  @param[in]  certainty_threshold   If any of the characters have less
+	 *                    certainty than this, reject.
+	 *  @param[out]  left_ok  How many left-side characters were ok?
+	 *  @param[out]  right_ok  How many right-side characters were ok?
+	 *  @return  Whether the complete best choice is believable as a superscript.
+	 */
+	bool Tesseract::BelievableSuperscript(bool debug,
+		const WERD_RES &word,
+		float certainty_threshold,
+		int *left_ok,
+		int *right_ok) const {
+		int initial_ok_run_count = 0;
+		int ok_run_count = 0;
+		float worst_certainty = 0.0f;
+		const WERD_CHOICE &wc = *word.best_choice;
+
+		const UnicityTable<FontInfo>& fontinfo_table = get_fontinfo_table();
+		for (int i = 0; i < wc.length(); i++) {
+			TBLOB *blob = word.rebuild_word->blobs[i];
+			UNICHAR_ID unichar_id = wc.unichar_id(i);
+			float char_certainty = wc.certainty(i);
+			bool bad_certainty = char_certainty < certainty_threshold;
+			bool is_punc = wc.unicharset()->get_ispunctuation(unichar_id);
+			bool is_italic = word.fontinfo && word.fontinfo->is_italic();
+			BLOB_CHOICE *choice = word.GetBlobChoice(i);
+			if (choice && fontinfo_table.size() > 0) {
+				// Get better information from the specific choice, if available.
+				int font_id1 = choice->fontinfo_id();
+				bool font1_is_italic = font_id1 >= 0
+					? fontinfo_table.get(font_id1).is_italic() : false;
+				int font_id2 = choice->fontinfo_id2();
+				is_italic = font1_is_italic &&
+					(font_id2 < 0 || fontinfo_table.get(font_id2).is_italic());
+			}
+
+			float height_fraction = 1.0f;
+			float char_height = blob->bounding_box().height();
+			float normal_height = char_height;
+			if (wc.unicharset()->top_bottom_useful()) {
+				int min_bot, max_bot, min_top, max_top;
+				wc.unicharset()->get_top_bottom(unichar_id,
+					&min_bot, &max_bot,
+					&min_top, &max_top);
+				float hi_height = max_top - max_bot;
+				float lo_height = min_top - min_bot;
+				normal_height = (hi_height + lo_height) / 2;
+				if (normal_height >= kBlnXHeight) {
+					// Only ding characters that we have decent information for because
+					// they're supposed to be normal sized, not tiny specks or dashes.
+					height_fraction = char_height / normal_height;
+				}
+			}
+			bool bad_height = height_fraction < superscript_scaledown_ratio;
+
+			if (debug) {
+				if (is_italic) {
+					tprintf(" Rejecting: superscript is italic.\n");
+				}
+				if (is_punc) {
+					tprintf(" Rejecting: punctuation present.\n");
+				}
+				const char *char_str = wc.unicharset()->id_to_unichar(unichar_id);
+				if (bad_certainty) {
+					tprintf(" Rejecting: don't believe character %s with certainty %.2f "
+						"which is less than threshold %.2f\n", char_str,
+						char_certainty, certainty_threshold);
+				}
+				if (bad_height) {
+					tprintf(" Rejecting: character %s seems too small @ %.2f versus "
+						"expected %.2f\n", char_str, char_height, normal_height);
+				}
+			}
+			if (bad_certainty || bad_height || is_punc || is_italic) {
+				if (ok_run_count == i) {
+					initial_ok_run_count = ok_run_count;
+				}
+				ok_run_count = 0;
+			}
+			else {
+				ok_run_count++;
+			}
+			if (char_certainty < worst_certainty) {
+				worst_certainty = char_certainty;
+			}
+		}
+		bool all_ok = ok_run_count == wc.length();
+		if (all_ok && debug) {
+			tprintf(" Accept: worst revised certainty is %.2f\n", worst_certainty);
+		}
+		if (!all_ok) {
+			if (left_ok) *left_ok = initial_ok_run_count;
+			if (right_ok) *right_ok = ok_run_count;
+		}
+		return all_ok;
+	}
+
+
+}  // namespace tesseract
--- a/hgdriver/3rdparty/hgOCR/include/ccmain/tessbox.cpp
+++ b/hgdriver/3rdparty/hgOCR/include/ccmain/tessbox.cpp
@ -0,0 +1,82 @@
+/**********************************************************************
+ * File:        tessbox.cpp  (Formerly tessbox.c)
+ * Description: Black boxed Tess for developing a resaljet.
+ * Author:					Ray Smith
+ * Created:					Thu Apr 23 11:03:36 BST 1992
+ *
+ * (C) Copyright 1992, Hewlett-Packard Ltd.
+ ** Licensed under the Apache License, Version 2.0 (the "License");
+ ** you may not use this file except in compliance with the License.
+ ** You may obtain a copy of the License at
+ ** http://www.apache.org/licenses/LICENSE-2.0
+ ** Unless required by applicable law or agreed to in writing, software
+ ** distributed under the License is distributed on an "AS IS" BASIS,
+ ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ ** See the License for the specific language governing permissions and
+ ** limitations under the License.
+ *
+ **********************************************************************/
+
+#ifdef _MSC_VER
+#pragma warning(disable:4244)  // Conversion warnings
+#endif
+
+#include "mfoutline.h"
+#include "tessbox.h"
+#include "tesseractclass.h"
+
+#define EXTERN
+
+ /**
+  * @name tess_segment_pass_n
+  *
+  * Segment a word using the pass_n conditions of the tess segmenter.
+  * @param pass_n pass number
+  * @param word word to do
+  */
+
+namespace tesseract {
+	void Tesseract::tess_segment_pass_n(int pass_n, WERD_RES *word) {
+		int saved_enable_assoc = 0;
+		int saved_chop_enable = 0;
+
+		if (word->word->flag(W_DONT_CHOP)) {
+			saved_enable_assoc = wordrec_enable_assoc;
+			saved_chop_enable = chop_enable;
+			wordrec_enable_assoc.set_value(0);
+			chop_enable.set_value(0);
+		}
+		if (pass_n == 1)
+			set_pass1();
+		else
+			set_pass2();
+		recog_word(word);
+		if (word->best_choice == NULL)
+			word->SetupFake(*word->uch_set);
+		if (word->word->flag(W_DONT_CHOP)) {
+			wordrec_enable_assoc.set_value(saved_enable_assoc);
+			chop_enable.set_value(saved_chop_enable);
+		}
+	}
+
+	/**
+	 * @name tess_acceptable_word
+	 *
+	 * @return true if the word is regarded as "good enough".
+	 * @param word_choice after context
+	 * @param raw_choice before context
+	 */
+	bool Tesseract::tess_acceptable_word(WERD_RES* word) {
+		return getDict().AcceptableResult(word);
+	}
+
+
+	/**
+	 * @name tess_add_doc_word
+	 *
+	 * Add the given word to the document dictionary
+	 */
+	void Tesseract::tess_add_doc_word(WERD_CHOICE *word_choice) {
+		getDict().add_document_word(*word_choice);
+	}
+}  // namespace tesseract
--- a/hgdriver/3rdparty/hgOCR/include/ccmain/tessbox.h
+++ b/hgdriver/3rdparty/hgOCR/include/ccmain/tessbox.h
@ -0,0 +1,28 @@
+/**********************************************************************
+ * File:        tessbox.h  (Formerly tessbox.h)
+ * Description: Black boxed Tess for developing a resaljet.
+ * Author:					Ray Smith
+ * Created:					Thu Apr 23 11:03:36 BST 1992
+ *
+ * (C) Copyright 1992, Hewlett-Packard Ltd.
+ ** Licensed under the Apache License, Version 2.0 (the "License");
+ ** you may not use this file except in compliance with the License.
+ ** You may obtain a copy of the License at
+ ** http://www.apache.org/licenses/LICENSE-2.0
+ ** Unless required by applicable law or agreed to in writing, software
+ ** distributed under the License is distributed on an "AS IS" BASIS,
+ ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ ** See the License for the specific language governing permissions and
+ ** limitations under the License.
+ *
+ **********************************************************************/
+
+#ifndef           TESSBOX_H
+#define           TESSBOX_H
+
+#include          "ratngs.h"
+#include "tesseractclass.h"
+
+ // TODO(ocr-team): Delete this along with other empty header files.
+
+#endif
--- a/hgdriver/3rdparty/hgOCR/include/ccmain/tessedit.cpp
+++ b/hgdriver/3rdparty/hgOCR/include/ccmain/tessedit.cpp
@ -0,0 +1,501 @@
+/**********************************************************************
+ * File:        tessedit.cpp  (Formerly tessedit.c)
+ * Description: (Previously) Main program for merge of tess and editor.
+ *              Now just code to load the language model and various
+ *              engine-specific data files.
+ * Author:      Ray Smith
+ * Created:     Tue Jan 07 15:21:46 GMT 1992
+ *
+ * (C) Copyright 1992, Hewlett-Packard Ltd.
+ ** Licensed under the Apache License, Version 2.0 (the "License");
+ ** you may not use this file except in compliance with the License.
+ ** You may obtain a copy of the License at
+ ** http://www.apache.org/licenses/LICENSE-2.0
+ ** Unless required by applicable law or agreed to in writing, software
+ ** distributed under the License is distributed on an "AS IS" BASIS,
+ ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ ** See the License for the specific language governing permissions and
+ ** limitations under the License.
+ *
+ **********************************************************************/
+
+ // Include automatically generated configuration file if running autoconf.
+#ifdef HAVE_CONFIG_H
+#include "config_auto.h"
+#endif
+
+#include          "stderr.h"
+#include          "basedir.h"
+#include          "tessvars.h"
+#include          "control.h"
+#include          "reject.h"
+#include          "pageres.h"
+#include          "nwmain.h"
+#include          "pgedit.h"
+#include          "tprintf.h"
+#include          "tessedit.h"
+#include "stopper.h"
+#include "intmatcher.h"
+#include "chop.h"
+#include "efio.h"
+#include "danerror.h"
+#include "globals.h"
+#include "tesseractclass.h"
+#include "params.h"
+
+#define VARDIR        "configs/" /*variables files */
+								 // config under api
+#define API_CONFIG      "configs/api_config"
+
+ETEXT_DESC *global_monitor = NULL;  // progress monitor
+
+namespace tesseract {
+
+	// Read a "config" file containing a set of variable, value pairs.
+	// Searches the standard places: tessdata/configs, tessdata/tessconfigs
+	// and also accepts a relative or absolute path name.
+	void Tesseract::read_config_file(const char *filename,
+		SetParamConstraint constraint) {
+		STRING path = datadir;
+		path += "configs/";
+		path += filename;
+		FILE* fp;
+		if ((fp = fopen(path.string(), "rb")) != NULL) {
+			fclose(fp);
+		}
+		else {
+			path = datadir;
+			path += "tessconfigs/";
+			path += filename;
+			if ((fp = fopen(path.string(), "rb")) != NULL) {
+				fclose(fp);
+			}
+			else {
+				path = filename;
+			}
+		}
+		ParamUtils::ReadParamsFile(path.string(), constraint, this->params());
+	}
+
+	// Returns false if a unicharset file for the specified language was not found
+	// or was invalid.
+	// This function initializes TessdataManager. After TessdataManager is
+	// no longer needed, TessdataManager::End() should be called.
+	//
+	// This function sets tessedit_oem_mode to the given OcrEngineMode oem, unless
+	// it is OEM_DEFAULT, in which case the value of the variable will be obtained
+	// from the language-specific config file (stored in [lang].traineddata), from
+	// the config files specified on the command line or left as the default
+	// OEM_TESSERACT_ONLY if none of the configs specify this variable.
+	bool Tesseract::init_tesseract_lang_data(
+		const char *arg0, const char *textbase, const char *language,
+		OcrEngineMode oem, char **configs, int configs_size,
+		const GenericVector<STRING> *vars_vec,
+		const GenericVector<STRING> *vars_values,
+		bool set_only_non_debug_params) {
+		// Set the basename, compute the data directory.
+		main_setup(arg0, textbase);
+
+		// Set the language data path prefix
+		lang = language != NULL ? language : "eng";
+		language_data_path_prefix = datadir;
+		language_data_path_prefix += lang;
+		language_data_path_prefix += ".";
+
+		// Initialize TessdataManager.
+		//STRING tessdata_path = language_data_path_prefix + kTrainedDataSuffix;
+		STRING tessdata_path = arg0;
+		if (!tessdata_manager.Init(tessdata_path.string(),
+			tessdata_manager_debug_level)) {
+			return false;
+		}
+
+		// If a language specific config file (lang.config) exists, load it in.
+		if (tessdata_manager.SeekToStart(TESSDATA_LANG_CONFIG)) {
+			ParamUtils::ReadParamsFromFp(
+				tessdata_manager.GetDataFilePtr(),
+				tessdata_manager.GetEndOffset(TESSDATA_LANG_CONFIG),
+				SET_PARAM_CONSTRAINT_NONE, this->params());
+			if (tessdata_manager_debug_level) {
+				tprintf("Loaded language config file\n");
+			}
+		}
+
+		SetParamConstraint set_params_constraint = set_only_non_debug_params ?
+			SET_PARAM_CONSTRAINT_NON_DEBUG_ONLY : SET_PARAM_CONSTRAINT_NONE;
+		// Load tesseract variables from config files. This is done after loading
+		// language-specific variables from [lang].traineddata file, so that custom
+		// config files can override values in [lang].traineddata file.
+		for (int i = 0; i < configs_size; ++i) {
+			read_config_file(configs[i], set_params_constraint);
+		}
+
+		// Set params specified in vars_vec (done after setting params from config
+		// files, so that params in vars_vec can override those from files).
+		if (vars_vec != NULL && vars_values != NULL) {
+			for (int i = 0; i < vars_vec->size(); ++i) {
+				if (!ParamUtils::SetParam((*vars_vec)[i].string(),
+					(*vars_values)[i].string(),
+					set_params_constraint, this->params())) {
+					tprintf("Error setting param %s\n", (*vars_vec)[i].string());
+					exit(1);
+				}
+			}
+		}
+
+		if (((STRING &)tessedit_write_params_to_file).length() > 0) {
+			FILE *params_file = fopen(tessedit_write_params_to_file.string(), "wb");
+			if (params_file != NULL) {
+				ParamUtils::PrintParams(params_file, this->params());
+				fclose(params_file);
+				if (tessdata_manager_debug_level > 0) {
+					tprintf("Wrote parameters to %s\n",
+						tessedit_write_params_to_file.string());
+				}
+			}
+			else {
+				tprintf("Failed to open %s for writing params.\n",
+					tessedit_write_params_to_file.string());
+			}
+		}
+
+		// Determine which ocr engine(s) should be loaded and used for recognition.
+		if (oem != OEM_DEFAULT) tessedit_ocr_engine_mode.set_value(oem);
+		if (tessdata_manager_debug_level) {
+			tprintf("Loading Tesseract/Cube with tessedit_ocr_engine_mode %d\n",
+				static_cast<int>(tessedit_ocr_engine_mode));
+		}
+
+		// If we are only loading the config file (and so not planning on doing any
+		// recognition) then there's nothing else do here.
+		if (tessedit_init_config_only) {
+			if (tessdata_manager_debug_level) {
+				tprintf("Returning after loading config file\n");
+			}
+			return true;
+		}
+
+		// Load the unicharset
+		if (!tessdata_manager.SeekToStart(TESSDATA_UNICHARSET) ||
+			!unicharset.load_from_file(tessdata_manager.GetDataFilePtr())) {
+			return false;
+		}
+		if (unicharset.size() > MAX_NUM_CLASSES) {
+			tprintf("Error: Size of unicharset is greater than MAX_NUM_CLASSES\n");
+			return false;
+		}
+		if (tessdata_manager_debug_level) tprintf("Loaded unicharset\n");
+		right_to_left_ = unicharset.major_right_to_left();
+
+		// Setup initial unichar ambigs table and read universal ambigs.
+		UNICHARSET encoder_unicharset;
+		encoder_unicharset.CopyFrom(unicharset);
+		unichar_ambigs.InitUnicharAmbigs(unicharset, use_ambigs_for_adaption);
+		unichar_ambigs.LoadUniversal(encoder_unicharset, &unicharset);
+
+		if (!tessedit_ambigs_training &&
+			tessdata_manager.SeekToStart(TESSDATA_AMBIGS)) {
+			TFile ambigs_file;
+			ambigs_file.Open(tessdata_manager.GetDataFilePtr(),
+				tessdata_manager.GetEndOffset(TESSDATA_AMBIGS) + 1);
+			unichar_ambigs.LoadUnicharAmbigs(
+				encoder_unicharset,
+				&ambigs_file,
+				ambigs_debug_level, use_ambigs_for_adaption, &unicharset);
+			if (tessdata_manager_debug_level) tprintf("Loaded ambigs\n");
+		}
+
+		// The various OcrEngineMode settings (see publictypes.h) determine which
+		// engine-specific data files need to be loaded. Currently everything needs
+		// the base tesseract data, which supplies other useful information, but
+		// alternative engines, such as cube and LSTM are optional.
+#ifndef NO_CUBE_BUILD
+		if (tessedit_ocr_engine_mode == OEM_CUBE_ONLY) {
+			ASSERT_HOST(init_cube_objects(false, &tessdata_manager));
+			if (tessdata_manager_debug_level)
+				tprintf("Loaded Cube w/out combiner\n");
+		}
+		else if (tessedit_ocr_engine_mode == OEM_TESSERACT_CUBE_COMBINED) {
+			ASSERT_HOST(init_cube_objects(true, &tessdata_manager));
+			if (tessdata_manager_debug_level)
+				tprintf("Loaded Cube with combiner\n");
+		}
+#endif
+		// Init ParamsModel.
+		// Load pass1 and pass2 weights (for now these two sets are the same, but in
+		// the future separate sets of weights can be generated).
+		for (int p = ParamsModel::PTRAIN_PASS1;
+			p < ParamsModel::PTRAIN_NUM_PASSES; ++p) {
+			language_model_->getParamsModel().SetPass(
+				static_cast<ParamsModel::PassEnum>(p));
+			if (tessdata_manager.SeekToStart(TESSDATA_PARAMS_MODEL)) {
+				if (!language_model_->getParamsModel().LoadFromFp(
+					lang.string(), tessdata_manager.GetDataFilePtr(),
+					tessdata_manager.GetEndOffset(TESSDATA_PARAMS_MODEL))) {
+					return false;
+				}
+			}
+		}
+		if (tessdata_manager_debug_level) language_model_->getParamsModel().Print();
+
+		return true;
+	}
+
+	// Helper returns true if the given string is in the vector of strings.
+	static bool IsStrInList(const STRING& str,
+		const GenericVector<STRING>& str_list) {
+		for (int i = 0; i < str_list.size(); ++i) {
+			if (str_list[i] == str)
+				return true;
+		}
+		return false;
+	}
+
+	// Parse a string of the form [~]<lang>[+[~]<lang>]*.
+	// Langs with no prefix get appended to to_load, provided they
+	// are not in there already.
+	// Langs with ~ prefix get appended to not_to_load, provided they are not in
+	// there already.
+	void Tesseract::ParseLanguageString(const char* lang_str,
+		GenericVector<STRING>* to_load,
+		GenericVector<STRING>* not_to_load) {
+		STRING remains(lang_str);
+		while (remains.length() > 0) {
+			// Find the start of the lang code and which vector to add to.
+			const char* start = remains.string();
+			while (*start == '+')
+				++start;
+			GenericVector<STRING>* target = to_load;
+			if (*start == '~') {
+				target = not_to_load;
+				++start;
+			}
+			// Find the index of the end of the lang code in string start.
+			int end = strlen(start);
+			const char* plus = strchr(start, '+');
+			if (plus != NULL && plus - start < end)
+				end = plus - start;
+			STRING lang_code(start);
+			lang_code.truncate_at(end);
+			STRING next(start + end);
+			remains = next;
+			// Check whether lang_code is already in the target vector and add.
+			if (!IsStrInList(lang_code, *target)) {
+				if (tessdata_manager_debug_level)
+					tprintf("Adding language '%s' to list\n", lang_code.string());
+				target->push_back(lang_code);
+			}
+		}
+	}
+
+	// Initialize for potentially a set of languages defined by the language
+	// string and recursively any additional languages required by any language
+	// traineddata file (via tessedit_load_sublangs in its config) that is loaded.
+	// See init_tesseract_internal for args.
+	int Tesseract::init_tesseract(
+		const char *arg0, const char *textbase, const char *language,
+		OcrEngineMode oem, char **configs, int configs_size,
+		const GenericVector<STRING> *vars_vec,
+		const GenericVector<STRING> *vars_values,
+		bool set_only_non_debug_params) {
+		GenericVector<STRING> langs_to_load;
+		GenericVector<STRING> langs_not_to_load;
+		ParseLanguageString(language, &langs_to_load, &langs_not_to_load);
+
+		sub_langs_.delete_data_pointers();
+		sub_langs_.clear();
+		// Find the first loadable lang and load into this.
+		// Add any languages that this language requires
+		bool loaded_primary = false;
+		// Load the rest into sub_langs_.
+		for (int lang_index = 0; lang_index < langs_to_load.size(); ++lang_index) {
+			if (!IsStrInList(langs_to_load[lang_index], langs_not_to_load)) {
+				const char *lang_str = langs_to_load[lang_index].string();
+				Tesseract *tess_to_init;
+				if (!loaded_primary) {
+					tess_to_init = this;
+				}
+				else {
+					tess_to_init = new Tesseract;
+				}
+
+				int result = tess_to_init->init_tesseract_internal(
+					arg0, textbase, lang_str, oem, configs, configs_size,
+					vars_vec, vars_values, set_only_non_debug_params);
+
+				if (!loaded_primary) {
+					if (result < 0) {
+						tprintf("Failed loading language '%s'\n", lang_str);
+					}
+					else {
+						if (tessdata_manager_debug_level)
+							tprintf("Loaded language '%s' as main language\n", lang_str);
+						ParseLanguageString(tess_to_init->tessedit_load_sublangs.string(),
+							&langs_to_load, &langs_not_to_load);
+						loaded_primary = true;
+					}
+				}
+				else {
+					if (result < 0) {
+						tprintf("Failed loading language '%s'\n", lang_str);
+						delete tess_to_init;
+					}
+					else {
+						if (tessdata_manager_debug_level)
+							tprintf("Loaded language '%s' as secondary language\n", lang_str);
+						sub_langs_.push_back(tess_to_init);
+						// Add any languages that this language requires
+						ParseLanguageString(tess_to_init->tessedit_load_sublangs.string(),
+							&langs_to_load, &langs_not_to_load);
+					}
+				}
+			}
+		}
+		if (!loaded_primary) {
+			tprintf("Tesseract couldn't load any languages!\n");
+			return -1;  // Couldn't load any language!
+		}
+		if (!sub_langs_.empty()) {
+			// In multilingual mode word ratings have to be directly comparable,
+			// so use the same language model weights for all languages:
+			// use the primary language's params model if
+			// tessedit_use_primary_params_model is set,
+			// otherwise use default language model weights.
+			if (tessedit_use_primary_params_model) {
+				for (int s = 0; s < sub_langs_.size(); ++s) {
+					sub_langs_[s]->language_model_->getParamsModel().Copy(
+						this->language_model_->getParamsModel());
+				}
+				tprintf("Using params model of the primary language\n");
+				if (tessdata_manager_debug_level) {
+					this->language_model_->getParamsModel().Print();
+				}
+			}
+			else {
+				this->language_model_->getParamsModel().Clear();
+				for (int s = 0; s < sub_langs_.size(); ++s) {
+					sub_langs_[s]->language_model_->getParamsModel().Clear();
+				}
+				if (tessdata_manager_debug_level)
+					tprintf("Using default language params\n");
+			}
+		}
+
+		SetupUniversalFontIds();
+		return 0;
+	}
+
+	// Common initialization for a single language.
+	// arg0 is the datapath for the tessdata directory, which could be the
+	// path of the tessdata directory with no trailing /, or (if tessdata
+	// lives in the same directory as the executable, the path of the executable,
+	// hence the name arg0.
+	// textbase is an optional output file basename (used only for training)
+	// language is the language code to load.
+	// oem controls which engine(s) will operate on the image
+	// configs (argv) is an array of config filenames to load variables from.
+	// May be NULL.
+	// configs_size (argc) is the number of elements in configs.
+	// vars_vec is an optional vector of variables to set.
+	// vars_values is an optional corresponding vector of values for the variables
+	// in vars_vec.
+	// If set_only_init_params is true, then only the initialization variables
+	// will be set.
+	int Tesseract::init_tesseract_internal(
+		const char *arg0, const char *textbase, const char *language,
+		OcrEngineMode oem, char **configs, int configs_size,
+		const GenericVector<STRING> *vars_vec,
+		const GenericVector<STRING> *vars_values,
+		bool set_only_non_debug_params) {
+		if (!init_tesseract_lang_data(arg0, textbase, language, oem, configs,
+			configs_size, vars_vec, vars_values,
+			set_only_non_debug_params)) {
+			return -1;
+		}
+		if (tessedit_init_config_only) {
+			tessdata_manager.End();
+			return 0;
+		}
+		// If only Cube will be used, skip loading Tesseract classifier's
+		// pre-trained templates.
+		bool init_tesseract_classifier =
+			(tessedit_ocr_engine_mode == OEM_TESSERACT_ONLY ||
+				tessedit_ocr_engine_mode == OEM_TESSERACT_CUBE_COMBINED);
+		// If only Cube will be used and if it has its own Unicharset,
+		// skip initializing permuter and loading Tesseract Dawgs.
+		bool init_dict =
+			!(tessedit_ocr_engine_mode == OEM_CUBE_ONLY &&
+				tessdata_manager.SeekToStart(TESSDATA_CUBE_UNICHARSET));
+		program_editup(textbase, init_tesseract_classifier, init_dict);
+		tessdata_manager.End();
+		return 0;                      //Normal exit
+	}
+
+	// Helper builds the all_fonts table by adding new fonts from new_fonts.
+	static void CollectFonts(const UnicityTable<FontInfo>& new_fonts,
+		UnicityTable<FontInfo>* all_fonts) {
+		for (int i = 0; i < new_fonts.size(); ++i) {
+			// UnicityTable uniques as we go.
+			all_fonts->push_back(new_fonts.get(i));
+		}
+	}
+
+	// Helper assigns an id to lang_fonts using the index in all_fonts table.
+	static void AssignIds(const UnicityTable<FontInfo>& all_fonts,
+		UnicityTable<FontInfo>* lang_fonts) {
+		for (int i = 0; i < lang_fonts->size(); ++i) {
+			int index = all_fonts.get_id(lang_fonts->get(i));
+			lang_fonts->get_mutable(i)->universal_id = index;
+		}
+	}
+
+	// Set the universal_id member of each font to be unique among all
+	// instances of the same font loaded.
+	void Tesseract::SetupUniversalFontIds() {
+		// Note that we can get away with bitwise copying FontInfo in
+		// all_fonts, as it is a temporary structure and we avoid setting the
+		// delete callback.
+		UnicityTable<FontInfo> all_fonts;
+		all_fonts.set_compare_callback(NewPermanentTessCallback(CompareFontInfo));
+
+		// Create the universal ID table.
+		CollectFonts(get_fontinfo_table(), &all_fonts);
+		for (int i = 0; i < sub_langs_.size(); ++i) {
+			CollectFonts(sub_langs_[i]->get_fontinfo_table(), &all_fonts);
+		}
+		// Assign ids from the table to each font table.
+		AssignIds(all_fonts, &get_fontinfo_table());
+		for (int i = 0; i < sub_langs_.size(); ++i) {
+			AssignIds(all_fonts, &sub_langs_[i]->get_fontinfo_table());
+		}
+		font_table_size_ = all_fonts.size();
+	}
+
+	// init the LM component
+	int Tesseract::init_tesseract_lm(const char *arg0,
+		const char *textbase,
+		const char *language) {
+		if (!init_tesseract_lang_data(arg0, textbase, language, OEM_TESSERACT_ONLY,
+			NULL, 0, NULL, NULL, false))
+			return -1;
+		getDict().SetupForLoad(Dict::GlobalDawgCache());
+		getDict().Load(tessdata_manager.GetDataFileName().string(), lang);
+		getDict().FinishLoad();
+		tessdata_manager.End();
+		return 0;
+	}
+
+	void Tesseract::end_tesseract() {
+		end_recog();
+	}
+
+	/* Define command type identifiers */
+
+	enum CMD_EVENTS
+	{
+		ACTION_1_CMD_EVENT,
+		RECOG_WERDS,
+		RECOG_PSEUDO,
+		ACTION_2_CMD_EVENT
+	};
+}  // namespace tesseract
--- a/hgdriver/3rdparty/hgOCR/include/ccmain/tessedit.h
+++ b/hgdriver/3rdparty/hgOCR/include/ccmain/tessedit.h
@ -0,0 +1,29 @@
+/**********************************************************************
+ * File:        tessedit.h  (Formerly tessedit.h)
+ * Description: Main program for merge of tess and editor.
+ * Author:		Ray Smith
+ * Created:		Tue Jan 07 15:21:46 GMT 1992
+ *
+ * (C) Copyright 1992, Hewlett-Packard Ltd.
+ ** Licensed under the Apache License, Version 2.0 (the "License");
+ ** you may not use this file except in compliance with the License.
+ ** You may obtain a copy of the License at
+ ** http://www.apache.org/licenses/LICENSE-2.0
+ ** Unless required by applicable law or agreed to in writing, software
+ ** distributed under the License is distributed on an "AS IS" BASIS,
+ ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ ** See the License for the specific language governing permissions and
+ ** limitations under the License.
+ *
+ **********************************************************************/
+
+#ifndef           TESSEDIT_H
+#define           TESSEDIT_H
+
+#include          "blobs.h"
+#include          "pgedit.h"
+
+ //progress monitor
+extern ETEXT_DESC *global_monitor;
+
+#endif
--- a/hgdriver/3rdparty/hgOCR/include/ccmain/tesseract_cube_combiner.cpp
+++ b/hgdriver/3rdparty/hgOCR/include/ccmain/tesseract_cube_combiner.cpp
@ -0,0 +1,305 @@
+/**********************************************************************
+ * File:        tesseract_cube_combiner.h
+ * Description: Declaration of the Tesseract & Cube results combiner Class
+ * Author:    Ahmad Abdulkader
+ * Created:   2008
+ *
+ * (C) Copyright 2008, Google Inc.
+ ** Licensed under the Apache License, Version 2.0 (the "License");
+ ** you may not use this file except in compliance with the License.
+ ** You may obtain a copy of the License at
+ ** http://www.apache.org/licenses/LICENSE-2.0
+ ** Unless required by applicable law or agreed to in writing, software
+ ** distributed under the License is distributed on an "AS IS" BASIS,
+ ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ ** See the License for the specific language governing permissions and
+ ** limitations under the License.
+ *
+ **********************************************************************/
+
+ // The TesseractCubeCombiner class provides the functionality of combining
+ // the recognition results of Tesseract and Cube at the word level
+
+#include <algorithm>
+#include <wctype.h>
+
+#include "tesseract_cube_combiner.h"
+
+#include "cube_object.h"
+#include "cube_reco_context.h"
+#include "cube_utils.h"
+#include "neural_net.h"
+#include "tesseractclass.h"
+#include "word_altlist.h"
+
+namespace tesseract {
+
+	TesseractCubeCombiner::TesseractCubeCombiner(CubeRecoContext *cube_cntxt) {
+		cube_cntxt_ = cube_cntxt;
+		combiner_net_ = NULL;
+	}
+
+	TesseractCubeCombiner::~TesseractCubeCombiner() {
+		if (combiner_net_ != NULL) {
+			delete combiner_net_;
+			combiner_net_ = NULL;
+		}
+	}
+
+	bool TesseractCubeCombiner::LoadCombinerNet() {
+		ASSERT_HOST(cube_cntxt_);
+		// Compute the path of the combiner net
+		string data_path;
+		cube_cntxt_->GetDataFilePath(&data_path);
+		string net_file_name = data_path + cube_cntxt_->Lang() +
+			".tesseract_cube.nn";
+
+		// Return false if file does not exist
+		FILE *fp = fopen(net_file_name.c_str(), "rb");
+		if (fp == NULL)
+			return false;
+		else
+			fclose(fp);
+
+		// Load and validate net
+		combiner_net_ = NeuralNet::FromFile(net_file_name);
+		if (combiner_net_ == NULL) {
+			tprintf("Could not read combiner net file %s", net_file_name.c_str());
+			return false;
+		}
+		else if (combiner_net_->out_cnt() != 2) {
+			tprintf("Invalid combiner net file %s! Output count != 2\n",
+				net_file_name.c_str());
+			delete combiner_net_;
+			combiner_net_ = NULL;
+			return false;
+		}
+		return true;
+	}
+
+	// Normalize a UTF-8 string. Converts the UTF-8 string to UTF32 and optionally
+	// strips punc and/or normalizes case and then converts back
+	string TesseractCubeCombiner::NormalizeString(const string &str,
+		bool remove_punc,
+		bool norm_case) {
+		// convert to UTF32
+		string_32 str32;
+		CubeUtils::UTF8ToUTF32(str.c_str(), &str32);
+		// strip punc and normalize
+		string_32 new_str32;
+		for (int idx = 0; idx < str32.length(); idx++) {
+			// if no punc removal is required or not a punctuation character
+			if (!remove_punc || iswpunct(str32[idx]) == 0) {
+				char_32 norm_char = str32[idx];
+				// normalize case if required
+				if (norm_case && iswalpha(norm_char)) {
+					norm_char = towlower(norm_char);
+				}
+				new_str32.push_back(norm_char);
+			}
+		}
+		// convert back to UTF8
+		string new_str;
+		CubeUtils::UTF32ToUTF8(new_str32.c_str(), &new_str);
+		return new_str;
+	}
+
+	// Compares 2 strings optionally ignoring punctuation
+	int TesseractCubeCombiner::CompareStrings(const string &str1,
+		const string &str2,
+		bool ignore_punc,
+		bool ignore_case) {
+		if (!ignore_punc && !ignore_case) {
+			return str1.compare(str2);
+		}
+		string norm_str1 = NormalizeString(str1, ignore_punc, ignore_case);
+		string norm_str2 = NormalizeString(str2, ignore_punc, ignore_case);
+		return norm_str1.compare(norm_str2);
+	}
+
+	// Check if a string is a valid Tess dict word or not
+	bool TesseractCubeCombiner::ValidWord(const string &str) {
+		return (cube_cntxt_->TesseractObject()->getDict().valid_word(str.c_str())
+		> 0);
+	}
+
+	// Public method for computing the combiner features. The agreement
+	// output parameter will be true if both answers are identical,
+	// and false otherwise.
+	bool TesseractCubeCombiner::ComputeCombinerFeatures(const string &tess_str,
+		int tess_confidence,
+		CubeObject *cube_obj,
+		WordAltList *cube_alt_list,
+		vector<double> *features,
+		bool *agreement) {
+		features->clear();
+		*agreement = false;
+		if (cube_alt_list == NULL || cube_alt_list->AltCount() <= 0)
+			return false;
+
+		// Get Cube's best string; return false if empty
+		char_32 *cube_best_str32 = cube_alt_list->Alt(0);
+		if (cube_best_str32 == NULL || CubeUtils::StrLen(cube_best_str32) < 1)
+			return false;
+		string cube_best_str;
+		int cube_best_cost = cube_alt_list->AltCost(0);
+		int cube_best_bigram_cost = 0;
+		bool cube_best_bigram_cost_valid = true;
+		if (cube_cntxt_->Bigrams())
+			cube_best_bigram_cost = cube_cntxt_->Bigrams()->
+			Cost(cube_best_str32, cube_cntxt_->CharacterSet());
+		else
+			cube_best_bigram_cost_valid = false;
+		CubeUtils::UTF32ToUTF8(cube_best_str32, &cube_best_str);
+
+		// Get Tesseract's UTF32 string
+		string_32 tess_str32;
+		CubeUtils::UTF8ToUTF32(tess_str.c_str(), &tess_str32);
+
+		// Compute agreement flag
+		*agreement = (tess_str.compare(cube_best_str) == 0);
+
+		// Get Cube's second best string; if empty, return false
+		char_32 *cube_next_best_str32;
+		string cube_next_best_str;
+		int cube_next_best_cost = WORST_COST;
+		if (cube_alt_list->AltCount() > 1) {
+			cube_next_best_str32 = cube_alt_list->Alt(1);
+			if (cube_next_best_str32 == NULL ||
+				CubeUtils::StrLen(cube_next_best_str32) == 0) {
+				return false;
+			}
+			cube_next_best_cost = cube_alt_list->AltCost(1);
+			CubeUtils::UTF32ToUTF8(cube_next_best_str32, &cube_next_best_str);
+		}
+		// Rank of Tesseract's top result in Cube's alternate list
+		int tess_rank = 0;
+		for (tess_rank = 0; tess_rank < cube_alt_list->AltCount(); tess_rank++) {
+			string alt_str;
+			CubeUtils::UTF32ToUTF8(cube_alt_list->Alt(tess_rank), &alt_str);
+			if (alt_str == tess_str)
+				break;
+		}
+
+		// Cube's cost for tesseract's result. Note that this modifies the
+		// state of cube_obj, including its alternate list by calling RecognizeWord()
+		int tess_cost = cube_obj->WordCost(tess_str.c_str());
+		// Cube's bigram cost of Tesseract's string
+		int tess_bigram_cost = 0;
+		int tess_bigram_cost_valid = true;
+		if (cube_cntxt_->Bigrams())
+			tess_bigram_cost = cube_cntxt_->Bigrams()->
+			Cost(tess_str32.c_str(), cube_cntxt_->CharacterSet());
+		else
+			tess_bigram_cost_valid = false;
+
+		// Tesseract confidence
+		features->push_back(tess_confidence);
+		// Cube cost of Tesseract string
+		features->push_back(tess_cost);
+		// Cube Rank of Tesseract string
+		features->push_back(tess_rank);
+		// length of Tesseract OCR string
+		features->push_back(tess_str.length());
+		// Tesseract OCR string in dictionary
+		features->push_back(ValidWord(tess_str));
+		if (tess_bigram_cost_valid) {
+			// bigram cost of Tesseract string
+			features->push_back(tess_bigram_cost);
+		}
+		// Cube tess_cost of Cube best string
+		features->push_back(cube_best_cost);
+		// Cube tess_cost of Cube next best string
+		features->push_back(cube_next_best_cost);
+		// length of Cube string
+		features->push_back(cube_best_str.length());
+		// Cube string in dictionary
+		features->push_back(ValidWord(cube_best_str));
+		if (cube_best_bigram_cost_valid) {
+			// bigram cost of Cube string
+			features->push_back(cube_best_bigram_cost);
+		}
+		// case-insensitive string comparison, including punctuation
+		int compare_nocase_punc = CompareStrings(cube_best_str,
+			tess_str, false, true);
+		features->push_back(compare_nocase_punc == 0);
+		// case-sensitive string comparison, ignoring punctuation
+		int compare_case_nopunc = CompareStrings(cube_best_str,
+			tess_str, true, false);
+		features->push_back(compare_case_nopunc == 0);
+		// case-insensitive string comparison, ignoring punctuation
+		int compare_nocase_nopunc = CompareStrings(cube_best_str,
+			tess_str, true, true);
+		features->push_back(compare_nocase_nopunc == 0);
+		return true;
+	}
+
+	// The CubeObject parameter is used for 2 purposes: 1) to retrieve
+	// cube's alt list, and 2) to compute cube's word cost for the
+	// tesseract result. The call to CubeObject::WordCost() modifies
+	// the object's alternate list, so previous state will be lost.
+	float TesseractCubeCombiner::CombineResults(WERD_RES *tess_res,
+		CubeObject *cube_obj) {
+		// If no combiner is loaded or the cube object is undefined,
+		// tesseract wins with probability 1.0
+		if (combiner_net_ == NULL || cube_obj == NULL) {
+			tprintf("Cube WARNING (TesseractCubeCombiner::CombineResults): "
+				"Cube objects not initialized; defaulting to Tesseract\n");
+			return 1.0;
+		}
+
+		// Retrieve the alternate list from the CubeObject's current state.
+		// If the alt list empty, tesseract wins with probability 1.0
+		WordAltList *cube_alt_list = cube_obj->AlternateList();
+		if (cube_alt_list == NULL)
+			cube_alt_list = cube_obj->RecognizeWord();
+		if (cube_alt_list == NULL || cube_alt_list->AltCount() <= 0) {
+			tprintf("Cube WARNING (TesseractCubeCombiner::CombineResults): "
+				"Cube returned no results; defaulting to Tesseract\n");
+			return 1.0;
+		}
+		return CombineResults(tess_res, cube_obj, cube_alt_list);
+	}
+
+	// The alt_list parameter is expected to have been extracted from the
+	// CubeObject that recognized the word to be combined. The cube_obj
+	// parameter passed may be either same instance or a separate instance to
+	// be used only by the combiner. In both cases, its alternate
+	// list will be modified by an internal call to RecognizeWord().
+	float TesseractCubeCombiner::CombineResults(WERD_RES *tess_res,
+		CubeObject *cube_obj,
+		WordAltList *cube_alt_list) {
+		// If no combiner is loaded or the cube object is undefined, or the
+		// alt list is empty, tesseract wins with probability 1.0
+		if (combiner_net_ == NULL || cube_obj == NULL ||
+			cube_alt_list == NULL || cube_alt_list->AltCount() <= 0) {
+			tprintf("Cube WARNING (TesseractCubeCombiner::CombineResults): "
+				"Cube result cannot be retrieved; defaulting to Tesseract\n");
+			return 1.0;
+		}
+
+		// Tesseract result string, tesseract confidence, and cost of
+		// tesseract result according to cube
+		string tess_str = tess_res->best_choice->unichar_string().string();
+		// Map certainty [-20.0, 0.0] to confidence [0, 100]
+		int tess_confidence = MIN(100, MAX(1, static_cast<int>(
+			100 + (5 * tess_res->best_choice->certainty()))));
+
+		// Compute the combiner features. If feature computation fails or
+		// answers are identical, tesseract wins with probability 1.0
+		vector<double> features;
+		bool agreement;
+		bool combiner_success = ComputeCombinerFeatures(tess_str, tess_confidence,
+			cube_obj, cube_alt_list,
+			&features, &agreement);
+		if (!combiner_success || agreement)
+			return 1.0;
+
+		// Classify combiner feature vector and return output (probability
+		// of tesseract class).
+		double net_out[2];
+		if (!combiner_net_->FeedForward(&features[0], net_out))
+			return 1.0;
+		return net_out[1];
+	}
+}
--- a/hgdriver/3rdparty/hgOCR/include/ccmain/tesseract_cube_combiner.h
+++ b/hgdriver/3rdparty/hgOCR/include/ccmain/tesseract_cube_combiner.h
@ -0,0 +1,93 @@
+/**********************************************************************
+ * File:        tesseract_cube_combiner.h
+ * Description: Declaration of the Tesseract & Cube results combiner Class
+ * Author:    Ahmad Abdulkader
+ * Created:   2008
+ *
+ * (C) Copyright 2008, Google Inc.
+ ** Licensed under the Apache License, Version 2.0 (the "License");
+ ** you may not use this file except in compliance with the License.
+ ** You may obtain a copy of the License at
+ ** http://www.apache.org/licenses/LICENSE-2.0
+ ** Unless required by applicable law or agreed to in writing, software
+ ** distributed under the License is distributed on an "AS IS" BASIS,
+ ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ ** See the License for the specific language governing permissions and
+ ** limitations under the License.
+ *
+ **********************************************************************/
+
+ // The TesseractCubeCombiner class provides the functionality of combining
+ // the recognition results of Tesseract and Cube at the word level
+
+#ifndef TESSERACT_CCMAIN_TESSERACT_CUBE_COMBINER_H
+#define TESSERACT_CCMAIN_TESSERACT_CUBE_COMBINER_H
+
+#include <string>
+#include <vector>
+#include "pageres.h"
+
+namespace tesseract {
+
+	class CubeObject;
+	class NeuralNet;
+	class CubeRecoContext;
+	class WordAltList;
+
+	class TesseractCubeCombiner {
+	public:
+		explicit TesseractCubeCombiner(CubeRecoContext *cube_cntxt);
+		virtual ~TesseractCubeCombiner();
+
+		// There are 2 public methods for combining the results of tesseract
+		// and cube. Both return the probability that the Tesseract result is
+		// correct. The difference between the two interfaces is in how the
+		// passed-in CubeObject is used.
+
+		// The CubeObject parameter is used for 2 purposes: 1) to retrieve
+		// cube's alt list, and 2) to compute cube's word cost for the
+		// tesseract result. Both uses may modify the state of the
+		// CubeObject (including the BeamSearch state) with a call to
+		// RecognizeWord().
+		float CombineResults(WERD_RES *tess_res, CubeObject *cube_obj);
+
+		// The alt_list parameter is expected to have been extracted from the
+		// CubeObject that recognized the word to be combined. The cube_obj
+		// parameter passed in is a separate instance to be used only by
+		// the combiner.
+		float CombineResults(WERD_RES *tess_res, CubeObject *cube_obj,
+			WordAltList *alt_list);
+
+		// Public method for computing the combiner features. The agreement
+		// output parameter will be true if both answers are identical,
+		// false otherwise. Modifies the cube_alt_list, so no assumptions
+		// should be made about its state upon return.
+		bool ComputeCombinerFeatures(const std::string &tess_res,
+			int tess_confidence,
+			CubeObject *cube_obj,
+			WordAltList *cube_alt_list,
+			std::vector<double> *features,
+			bool *agreement);
+
+		// Is the word valid according to Tesseract's language model
+		bool ValidWord(const std::string &str);
+
+		// Loads the combiner neural network from file, using cube_cntxt_
+		// to find path.
+		bool LoadCombinerNet();
+	private:
+		// Normalize a UTF-8 string. Converts the UTF-8 string to UTF32 and optionally
+		// strips punc and/or normalizes case and then converts back
+		std::string NormalizeString(const std::string &str, bool remove_punc, bool norm_case);
+
+		// Compares 2 strings after optionally normalizing them and or stripping
+		// punctuation
+		int CompareStrings(const std::string &str1, const std::string &str2, bool ignore_punc,
+			bool norm_case);
+
+		NeuralNet *combiner_net_;  // pointer to the combiner NeuralNet object
+		CubeRecoContext *cube_cntxt_;  // used for language ID and data paths
+	};
+}
+
+#endif  // TESSERACT_CCMAIN_TESSERACT_CUBE_COMBINER_H
--- a/hgdriver/3rdparty/hgOCR/include/ccmain/tesseractclass.cpp
+++ b/hgdriver/3rdparty/hgOCR/include/ccmain/tesseractclass.cpp
@ -0,0 +1,769 @@
+///////////////////////////////////////////////////////////////////////
+// File:        tesseractclass.cpp
+// Description: The Tesseract class. It holds/owns everything needed
+//              to run Tesseract on a single language, and also a set of
+//              sub-Tesseracts to run sub-languages. For thread safety, *every*
+//              variable that was previously global or static (except for
+//              constant data, and some visual debugging flags) has been moved
+//              in here, directly, or indirectly.
+//              This makes it safe to run multiple Tesseracts in different
+//              threads in parallel, and keeps the different language
+//              instances separate.
+//              Some global functions remain, but they are isolated re-entrant
+//              functions that operate on their arguments. Functions that work
+//              on variable data have been moved to an appropriate class based
+//              mostly on the directory hierarchy. For more information see
+//              slide 6 of "2ArchitectureAndDataStructures" in
+// https://drive.google.com/file/d/0B7l10Bj_LprhbUlIUFlCdGtDYkE/edit?usp=sharing
+//              Some global data and related functions still exist in the
+//              training-related code, but they don't interfere with normal
+//              recognition operation.
+// Author:      Ray Smith
+// Created:     Fri Mar 07 08:17:01 PST 2008
+//
+// (C) Copyright 2008, Google Inc.
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+// http://www.apache.org/licenses/LICENSE-2.0
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+///////////////////////////////////////////////////////////////////////
+
+// Include automatically generated configuration file if running autoconf.
+#ifdef HAVE_CONFIG_H
+#include "config_auto.h"
+#endif
+
+#include "tesseractclass.h"
+
+#include "allheaders.h"
+#ifndef NO_CUBE_BUILD
+#include "cube_reco_context.h"
+#endif
+#include "edgblob.h"
+#include "equationdetect.h"
+#include "globals.h"
+#ifndef NO_CUBE_BUILD
+#include "tesseract_cube_combiner.h"
+#endif
+
+namespace tesseract {
+
+	Tesseract::Tesseract()
+		: BOOL_MEMBER(tessedit_resegment_from_boxes, false,
+			"Take segmentation and labeling from box file",
+			this->params()),
+		BOOL_MEMBER(tessedit_resegment_from_line_boxes, false,
+			"Conversion of word/line box file to char box file",
+			this->params()),
+		BOOL_MEMBER(tessedit_train_from_boxes, false,
+			"Generate training data from boxed chars", this->params()),
+		BOOL_MEMBER(tessedit_make_boxes_from_boxes, false,
+			"Generate more boxes from boxed chars", this->params()),
+		BOOL_MEMBER(tessedit_dump_pageseg_images, false,
+			"Dump intermediate images made during page segmentation",
+			this->params()),
+		// The default for pageseg_mode is the old behaviour, so as not to
+		// upset anything that relies on that.
+		INT_MEMBER(
+			tessedit_pageseg_mode, PSM_SINGLE_BLOCK,
+			"Page seg mode: 0=osd only, 1=auto+osd, 2=auto, 3=col, 4=block,"
+			" 5=line, 6=word, 7=char"
+			" (Values from PageSegMode enum in publictypes.h)",
+			this->params()),
+		INT_INIT_MEMBER(tessedit_ocr_engine_mode, tesseract::OEM_TESSERACT_ONLY,
+			"Which OCR engine(s) to run (Tesseract, Cube, both)."
+			" Defaults to loading and running only Tesseract"
+			" (no Cube,no combiner)."
+			" Values from OcrEngineMode enum in tesseractclass.h)",
+			this->params()),
+		STRING_MEMBER(tessedit_char_blacklist, "",
+			"Blacklist of chars not to recognize", this->params()),
+		STRING_MEMBER(tessedit_char_whitelist, "",
+			"Whitelist of chars to recognize", this->params()),
+		STRING_MEMBER(tessedit_char_unblacklist, "",
+			"List of chars to override tessedit_char_blacklist",
+			this->params()),
+		BOOL_MEMBER(tessedit_ambigs_training, false,
+			"Perform training for ambiguities", this->params()),
+		INT_MEMBER(pageseg_devanagari_split_strategy,
+			tesseract::ShiroRekhaSplitter::NO_SPLIT,
+			"Whether to use the top-line splitting process for Devanagari "
+			"documents while performing page-segmentation.",
+			this->params()),
+		INT_MEMBER(ocr_devanagari_split_strategy,
+			tesseract::ShiroRekhaSplitter::NO_SPLIT,
+			"Whether to use the top-line splitting process for Devanagari "
+			"documents while performing ocr.",
+			this->params()),
+		STRING_MEMBER(tessedit_write_params_to_file, "",
+			"Write all parameters to the given file.", this->params()),
+		BOOL_MEMBER(tessedit_adaption_debug, false,
+			"Generate and print debug"
+			" information for adaption",
+			this->params()),
+		INT_MEMBER(bidi_debug, 0, "Debug level for BiDi", this->params()),
+		INT_MEMBER(applybox_debug, 1, "Debug level", this->params()),
+		INT_MEMBER(applybox_page, 0, "Page number to apply boxes from",
+			this->params()),
+		STRING_MEMBER(applybox_exposure_pattern, ".exp",
+			"Exposure value follows"
+			" this pattern in the image filename. The name of the image"
+			" files are expected to be in the form"
+			" [lang].[fontname].exp[num].tif",
+			this->params()),
+		BOOL_MEMBER(applybox_learn_chars_and_char_frags_mode, false,
+			"Learn both character fragments (as is done in the"
+			" special low exposure mode) as well as unfragmented"
+			" characters.",
+			this->params()),
+		BOOL_MEMBER(applybox_learn_ngrams_mode, false,
+			"Each bounding box"
+			" is assumed to contain ngrams. Only learn the ngrams"
+			" whose outlines overlap horizontally.",
+			this->params()),
+		BOOL_MEMBER(tessedit_display_outwords, false, "Draw output words",
+			this->params()),
+		BOOL_MEMBER(tessedit_dump_choices, false, "Dump char choices",
+			this->params()),
+		BOOL_MEMBER(tessedit_timing_debug, false, "Print timing stats",
+			this->params()),
+		BOOL_MEMBER(tessedit_fix_fuzzy_spaces, true,
+			"Try to improve fuzzy spaces", this->params()),
+		BOOL_MEMBER(tessedit_unrej_any_wd, false,
+			"Don't bother with word plausibility", this->params()),
+		BOOL_MEMBER(tessedit_fix_hyphens, true, "Crunch double hyphens?",
+			this->params()),
+		BOOL_MEMBER(tessedit_redo_xheight, true, "Check/Correct x-height",
+			this->params()),
+		BOOL_MEMBER(tessedit_enable_doc_dict, true,
+			"Add words to the document dictionary", this->params()),
+		BOOL_MEMBER(tessedit_debug_fonts, false, "Output font info per char",
+			this->params()),
+		BOOL_MEMBER(tessedit_debug_block_rejection, false, "Block and Row stats",
+			this->params()),
+		BOOL_MEMBER(tessedit_enable_bigram_correction, true,
+			"Enable correction based on the word bigram dictionary.",
+			this->params()),
+		BOOL_MEMBER(tessedit_enable_dict_correction, false,
+			"Enable single word correction based on the dictionary.",
+			this->params()),
+		INT_MEMBER(tessedit_bigram_debug, 0,
+			"Amount of debug output for bigram correction.",
+			this->params()),
+		BOOL_MEMBER(enable_noise_removal, true,
+			"Remove and conditionally reassign small outlines when they"
+			" confuse layout analysis, determining diacritics vs noise",
+			this->params()),
+		INT_MEMBER(debug_noise_removal, 0, "Debug reassignment of small outlines",
+			this->params()),
+		// Worst (min) certainty, for which a diacritic is allowed to make the
+		// base
+		// character worse and still be included.
+		double_MEMBER(noise_cert_basechar, -8.0,
+			"Hingepoint for base char certainty", this->params()),
+		// Worst (min) certainty, for which a non-overlapping diacritic is allowed
+		// to make the base character worse and still be included.
+		double_MEMBER(noise_cert_disjoint, -1.0,
+			"Hingepoint for disjoint certainty", this->params()),
+		// Worst (min) certainty, for which a diacritic is allowed to make a new
+		// stand-alone blob.
+		double_MEMBER(noise_cert_punc, -3.0,
+			"Threshold for new punc char certainty", this->params()),
+		// Factor of certainty margin for adding diacritics to not count as worse.
+		double_MEMBER(noise_cert_factor, 0.375,
+			"Scaling on certainty diff from Hingepoint",
+			this->params()),
+		INT_MEMBER(noise_maxperblob, 8, "Max diacritics to apply to a blob",
+			this->params()),
+		INT_MEMBER(noise_maxperword, 16, "Max diacritics to apply to a word",
+			this->params()),
+		INT_MEMBER(debug_x_ht_level, 0, "Reestimate debug", this->params()),
+		BOOL_MEMBER(debug_acceptable_wds, false, "Dump word pass/fail chk",
+			this->params()),
+		STRING_MEMBER(chs_leading_punct, "('`\"", "Leading punctuation",
+			this->params()),
+		STRING_MEMBER(chs_trailing_punct1, ").,;:?!", "1st Trailing punctuation",
+			this->params()),
+		STRING_MEMBER(chs_trailing_punct2, ")'`\"", "2nd Trailing punctuation",
+			this->params()),
+		double_MEMBER(quality_rej_pc, 0.08,
+			"good_quality_doc lte rejection limit", this->params()),
+		double_MEMBER(quality_blob_pc, 0.0,
+			"good_quality_doc gte good blobs limit", this->params()),
+		double_MEMBER(quality_outline_pc, 1.0,
+			"good_quality_doc lte outline error limit", this->params()),
+		double_MEMBER(quality_char_pc, 0.95,
+			"good_quality_doc gte good char limit", this->params()),
+		INT_MEMBER(quality_min_initial_alphas_reqd, 2, "alphas in a good word",
+			this->params()),
+		INT_MEMBER(tessedit_tess_adaption_mode, 0x27,
+			"Adaptation decision algorithm for tess", this->params()),
+		BOOL_MEMBER(tessedit_minimal_rej_pass1, false,
+			"Do minimal rejection on pass 1 output", this->params()),
+		BOOL_MEMBER(tessedit_test_adaption, false, "Test adaption criteria",
+			this->params()),
+		BOOL_MEMBER(tessedit_matcher_log, false, "Log matcher activity",
+			this->params()),
+		INT_MEMBER(tessedit_test_adaption_mode, 3,
+			"Adaptation decision algorithm for tess", this->params()),
+		BOOL_MEMBER(test_pt, false, "Test for point", this->params()),
+		double_MEMBER(test_pt_x, 99999.99, "xcoord", this->params()),
+		double_MEMBER(test_pt_y, 99999.99, "ycoord", this->params()),
+		INT_MEMBER(paragraph_debug_level, 0, "Print paragraph debug info.",
+			this->params()),
+		BOOL_MEMBER(paragraph_text_based, true,
+			"Run paragraph detection on the post-text-recognition "
+			"(more accurate)",
+			this->params()),
+		INT_MEMBER(cube_debug_level, 0, "Print cube debug info.", this->params()),
+		STRING_MEMBER(outlines_odd, "%| ", "Non standard number of outlines",
+			this->params()),
+		STRING_MEMBER(outlines_2, "ij!?%\":;", "Non standard number of outlines",
+			this->params()),
+		BOOL_MEMBER(docqual_excuse_outline_errs, false,
+			"Allow outline errs in unrejection?", this->params()),
+		BOOL_MEMBER(tessedit_good_quality_unrej, true,
+			"Reduce rejection on good docs", this->params()),
+		BOOL_MEMBER(tessedit_use_reject_spaces, true, "Reject spaces?",
+			this->params()),
+		double_MEMBER(tessedit_reject_doc_percent, 65.00,
+			"%rej allowed before rej whole doc", this->params()),
+		double_MEMBER(tessedit_reject_block_percent, 45.00,
+			"%rej allowed before rej whole block", this->params()),
+		double_MEMBER(tessedit_reject_row_percent, 40.00,
+			"%rej allowed before rej whole row", this->params()),
+		double_MEMBER(tessedit_whole_wd_rej_row_percent, 70.00,
+			"Number of row rejects in whole word rejects"
+			"which prevents whole row rejection",
+			this->params()),
+		BOOL_MEMBER(tessedit_preserve_blk_rej_perfect_wds, true,
+			"Only rej partially rejected words in block rejection",
+			this->params()),
+		BOOL_MEMBER(tessedit_preserve_row_rej_perfect_wds, true,
+			"Only rej partially rejected words in row rejection",
+			this->params()),
+		BOOL_MEMBER(tessedit_dont_blkrej_good_wds, false,
+			"Use word segmentation quality metric", this->params()),
+		BOOL_MEMBER(tessedit_dont_rowrej_good_wds, false,
+			"Use word segmentation quality metric", this->params()),
+		INT_MEMBER(tessedit_preserve_min_wd_len, 2,
+			"Only preserve wds longer than this", this->params()),
+		BOOL_MEMBER(tessedit_row_rej_good_docs, true,
+			"Apply row rejection to good docs", this->params()),
+		double_MEMBER(tessedit_good_doc_still_rowrej_wd, 1.1,
+			"rej good doc wd if more than this fraction rejected",
+			this->params()),
+		BOOL_MEMBER(tessedit_reject_bad_qual_wds, true,
+			"Reject all bad quality wds", this->params()),
+		BOOL_MEMBER(tessedit_debug_doc_rejection, false, "Page stats",
+			this->params()),
+		BOOL_MEMBER(tessedit_debug_quality_metrics, false,
+			"Output data to debug file", this->params()),
+		BOOL_MEMBER(bland_unrej, false, "unrej potential with no chekcs",
+			this->params()),
+		double_MEMBER(quality_rowrej_pc, 1.1,
+			"good_quality_doc gte good char limit", this->params()),
+		BOOL_MEMBER(unlv_tilde_crunching, true,
+			"Mark v.bad words for tilde crunch", this->params()),
+		BOOL_MEMBER(hocr_font_info, false, "Add font info to hocr output",
+			this->params()),
+		BOOL_MEMBER(crunch_early_merge_tess_fails, true, "Before word crunch?",
+			this->params()),
+		BOOL_MEMBER(crunch_early_convert_bad_unlv_chs, false,
+			"Take out ~^ early?", this->params()),
+		double_MEMBER(crunch_terrible_rating, 80.0, "crunch rating lt this",
+			this->params()),
+		BOOL_MEMBER(crunch_terrible_garbage, true, "As it says", this->params()),
+		double_MEMBER(crunch_poor_garbage_cert, -9.0,
+			"crunch garbage cert lt this", this->params()),
+		double_MEMBER(crunch_poor_garbage_rate, 60,
+			"crunch garbage rating lt this", this->params()),
+		double_MEMBER(crunch_pot_poor_rate, 40, "POTENTIAL crunch rating lt this",
+			this->params()),
+		double_MEMBER(crunch_pot_poor_cert, -8.0, "POTENTIAL crunch cert lt this",
+			this->params()),
+		BOOL_MEMBER(crunch_pot_garbage, true, "POTENTIAL crunch garbage",
+			this->params()),
+		double_MEMBER(crunch_del_rating, 60, "POTENTIAL crunch rating lt this",
+			this->params()),
+		double_MEMBER(crunch_del_cert, -10.0, "POTENTIAL crunch cert lt this",
+			this->params()),
+		double_MEMBER(crunch_del_min_ht, 0.7, "Del if word ht lt xht x this",
+			this->params()),
+		double_MEMBER(crunch_del_max_ht, 3.0, "Del if word ht gt xht x this",
+			this->params()),
+		double_MEMBER(crunch_del_min_width, 3.0,
+			"Del if word width lt xht x this", this->params()),
+		double_MEMBER(crunch_del_high_word, 1.5,
+			"Del if word gt xht x this above bl", this->params()),
+		double_MEMBER(crunch_del_low_word, 0.5,
+			"Del if word gt xht x this below bl", this->params()),
+		double_MEMBER(crunch_small_outlines_size, 0.6, "Small if lt xht x this",
+			this->params()),
+		INT_MEMBER(crunch_rating_max, 10, "For adj length in rating per ch",
+			this->params()),
+		INT_MEMBER(crunch_pot_indicators, 1,
+			"How many potential indicators needed", this->params()),
+		BOOL_MEMBER(crunch_leave_ok_strings, true, "Don't touch sensible strings",
+			this->params()),
+		BOOL_MEMBER(crunch_accept_ok, true, "Use acceptability in okstring",
+			this->params()),
+		BOOL_MEMBER(crunch_leave_accept_strings, false,
+			"Don't pot crunch sensible strings", this->params()),
+		BOOL_MEMBER(crunch_include_numerals, false, "Fiddle alpha figures",
+			this->params()),
+		INT_MEMBER(crunch_leave_lc_strings, 4,
+			"Don't crunch words with long lower case strings",
+			this->params()),
+		INT_MEMBER(crunch_leave_uc_strings, 4,
+			"Don't crunch words with long lower case strings",
+			this->params()),
+		INT_MEMBER(crunch_long_repetitions, 3,
+			"Crunch words with long repetitions", this->params()),
+		INT_MEMBER(crunch_debug, 0, "As it says", this->params()),
+		INT_MEMBER(fixsp_non_noise_limit, 1,
+			"How many non-noise blbs either side?", this->params()),
+		double_MEMBER(fixsp_small_outlines_size, 0.28, "Small if lt xht x this",
+			this->params()),
+		BOOL_MEMBER(tessedit_prefer_joined_punct, false,
+			"Reward punctation joins", this->params()),
+		INT_MEMBER(fixsp_done_mode, 1, "What constitues done for spacing",
+			this->params()),
+		INT_MEMBER(debug_fix_space_level, 0, "Contextual fixspace debug",
+			this->params()),
+		STRING_MEMBER(numeric_punctuation, ".,",
+			"Punct. chs expected WITHIN numbers", this->params()),
+		INT_MEMBER(x_ht_acceptance_tolerance, 8,
+			"Max allowed deviation of blob top outside of font data",
+			this->params()),
+		INT_MEMBER(x_ht_min_change, 8,
+			"Min change in xht before actually trying it", this->params()),
+		INT_MEMBER(superscript_debug, 0,
+			"Debug level for sub & superscript fixer", this->params()),
+		double_MEMBER(
+			superscript_worse_certainty, 2.0,
+			"How many times worse "
+			"certainty does a superscript position glyph need to be for "
+			"us to try classifying it as a char with a different "
+			"baseline?",
+			this->params()),
+		double_MEMBER(
+			superscript_bettered_certainty, 0.97,
+			"What reduction in "
+			"badness do we think sufficient to choose a superscript "
+			"over what we'd thought.  For example, a value of 0.6 means "
+			"we want to reduce badness of certainty by at least 40%",
+			this->params()),
+		double_MEMBER(superscript_scaledown_ratio, 0.4,
+			"A superscript scaled down more than this is unbelievably "
+			"small.  For example, 0.3 means we expect the font size to "
+			"be no smaller than 30% of the text line font size.",
+			this->params()),
+		double_MEMBER(subscript_max_y_top, 0.5,
+			"Maximum top of a character measured as a multiple of "
+			"x-height above the baseline for us to reconsider whether "
+			"it's a subscript.",
+			this->params()),
+		double_MEMBER(superscript_min_y_bottom, 0.3,
+			"Minimum bottom of a character measured as a multiple of "
+			"x-height above the baseline for us to reconsider whether "
+			"it's a superscript.",
+			this->params()),
+		BOOL_MEMBER(tessedit_write_block_separators, false,
+			"Write block separators in output", this->params()),
+		BOOL_MEMBER(tessedit_write_rep_codes, false, "Write repetition char code",
+			this->params()),
+		BOOL_MEMBER(tessedit_write_unlv, false, "Write .unlv output file",
+			this->params()),
+		BOOL_MEMBER(tessedit_create_txt, false, "Write .txt output file",
+			this->params()),
+		BOOL_MEMBER(tessedit_create_hocr, false, "Write .html hOCR output file",
+			this->params()),
+		BOOL_MEMBER(tessedit_create_tsv, false, "Write .tsv output file",
+			this->params()),
+		BOOL_MEMBER(tessedit_create_pdf, false, "Write .pdf output file",
+			this->params()),
+		BOOL_MEMBER(textonly_pdf, false, "Create PDF with only one invisible text layer",
+			this->params()),
+		STRING_MEMBER(unrecognised_char, "|",
+			"Output char for unidentified blobs", this->params()),
+		INT_MEMBER(suspect_level, 99, "Suspect marker level", this->params()),
+		INT_MEMBER(suspect_space_level, 100,
+			"Min suspect level for rejecting spaces", this->params()),
+		INT_MEMBER(suspect_short_words, 2,
+			"Don't suspect dict wds longer than this", this->params()),
+		BOOL_MEMBER(suspect_constrain_1Il, false, "UNLV keep 1Il chars rejected",
+			this->params()),
+		double_MEMBER(suspect_rating_per_ch, 999.9,
+			"Don't touch bad rating limit", this->params()),
+		double_MEMBER(suspect_accept_rating, -999.9, "Accept good rating limit",
+			this->params()),
+		BOOL_MEMBER(tessedit_minimal_rejection, false,
+			"Only reject tess failures", this->params()),
+		BOOL_MEMBER(tessedit_zero_rejection, false, "Don't reject ANYTHING",
+			this->params()),
+		BOOL_MEMBER(tessedit_word_for_word, false,
+			"Make output have exactly one word per WERD", this->params()),
+		BOOL_MEMBER(tessedit_zero_kelvin_rejection, false,
+			"Don't reject ANYTHING AT ALL", this->params()),
+		BOOL_MEMBER(tessedit_consistent_reps, true,
+			"Force all rep chars the same", this->params()),
+		INT_MEMBER(tessedit_reject_mode, 0, "Rejection algorithm",
+			this->params()),
+		BOOL_MEMBER(tessedit_rejection_debug, false, "Adaption debug",
+			this->params()),
+		BOOL_MEMBER(tessedit_flip_0O, true, "Contextual 0O O0 flips",
+			this->params()),
+		double_MEMBER(tessedit_lower_flip_hyphen, 1.5,
+			"Aspect ratio dot/hyphen test", this->params()),
+		double_MEMBER(tessedit_upper_flip_hyphen, 1.8,
+			"Aspect ratio dot/hyphen test", this->params()),
+		BOOL_MEMBER(rej_trust_doc_dawg, false,
+			"Use DOC dawg in 11l conf. detector", this->params()),
+		BOOL_MEMBER(rej_1Il_use_dict_word, false, "Use dictword test",
+			this->params()),
+		BOOL_MEMBER(rej_1Il_trust_permuter_type, true, "Don't double check",
+			this->params()),
+		BOOL_MEMBER(rej_use_tess_accepted, true, "Individual rejection control",
+			this->params()),
+		BOOL_MEMBER(rej_use_tess_blanks, true, "Individual rejection control",
+			this->params()),
+		BOOL_MEMBER(rej_use_good_perm, true, "Individual rejection control",
+			this->params()),
+		BOOL_MEMBER(rej_use_sensible_wd, false, "Extend permuter check",
+			this->params()),
+		BOOL_MEMBER(rej_alphas_in_number_perm, false, "Extend permuter check",
+			this->params()),
+		double_MEMBER(rej_whole_of_mostly_reject_word_fract, 0.85,
+			"if >this fract", this->params()),
+		INT_MEMBER(tessedit_image_border, 2, "Rej blbs near image edge limit",
+			this->params()),
+		STRING_MEMBER(ok_repeated_ch_non_alphanum_wds, "-?*\075",
+			"Allow NN to unrej", this->params()),
+		STRING_MEMBER(conflict_set_I_l_1, "Il1[]", "Il1 conflict set",
+			this->params()),
+		INT_MEMBER(min_sane_x_ht_pixels, 8, "Reject any x-ht lt or eq than this",
+			this->params()),
+		BOOL_MEMBER(tessedit_create_boxfile, false, "Output text with boxes",
+			this->params()),
+		INT_MEMBER(tessedit_page_number, -1,
+			"-1 -> All pages"
+			" , else specifc page to process",
+			this->params()),
+		BOOL_MEMBER(tessedit_write_images, false,
+			"Capture the image from the IPE", this->params()),
+		BOOL_MEMBER(interactive_display_mode, false, "Run interactively?",
+			this->params()),
+		STRING_MEMBER(file_type, ".tif", "Filename extension", this->params()),
+		BOOL_MEMBER(tessedit_override_permuter, true, "According to dict_word",
+			this->params()),
+		INT_MEMBER(tessdata_manager_debug_level, 0,
+			"Debug level for"
+			" TessdataManager functions.",
+			this->params()),
+		STRING_MEMBER(tessedit_load_sublangs, "",
+			"List of languages to load with this one", this->params()),
+		BOOL_MEMBER(tessedit_use_primary_params_model, false,
+			"In multilingual mode use params model of the"
+			" primary language",
+			this->params()),
+		double_MEMBER(min_orientation_margin, 7.0,
+			"Min acceptable orientation margin", this->params()),
+		BOOL_MEMBER(textord_tabfind_show_vlines, false, "Debug line finding",
+			this->params()),
+		BOOL_MEMBER(textord_use_cjk_fp_model, FALSE, "Use CJK fixed pitch model",
+			this->params()),
+		BOOL_MEMBER(poly_allow_detailed_fx, false,
+			"Allow feature extractors to see the original outline",
+			this->params()),
+		BOOL_INIT_MEMBER(tessedit_init_config_only, false,
+			"Only initialize with the config file. Useful if the "
+			"instance is not going to be used for OCR but say only "
+			"for layout analysis.",
+			this->params()),
+		BOOL_MEMBER(textord_equation_detect, false, "Turn on equation detector",
+			this->params()),
+		BOOL_MEMBER(textord_tabfind_vertical_text, true,
+			"Enable vertical detection", this->params()),
+		BOOL_MEMBER(textord_tabfind_force_vertical_text, false,
+			"Force using vertical text page mode", this->params()),
+		double_MEMBER(
+			textord_tabfind_vertical_text_ratio, 0.5,
+			"Fraction of textlines deemed vertical to use vertical page "
+			"mode",
+			this->params()),
+		double_MEMBER(
+			textord_tabfind_aligned_gap_fraction, 0.75,
+			"Fraction of height used as a minimum gap for aligned blobs.",
+			this->params()),
+		INT_MEMBER(tessedit_parallelize, 0, "Run in parallel where possible",
+			this->params()),
+		BOOL_MEMBER(preserve_interword_spaces, false,
+			"Preserve multiple interword spaces", this->params()),
+		BOOL_MEMBER(include_page_breaks, FALSE,
+			"Include page separator string in output text after each "
+			"image/page.",
+			this->params()),
+		STRING_MEMBER(page_separator, "\f",
+			"Page separator (default is form feed control character)",
+			this->params()),
+
+		// The following parameters were deprecated and removed from their
+		// original
+		// locations. The parameters are temporarily kept here to give Tesseract
+		// users a chance to updated their [lang].traineddata and config files
+		// without introducing failures during Tesseract initialization.
+		// TODO(ocr-team): remove these parameters from the code once we are
+		// reasonably sure that Tesseract users have updated their data files.
+		//
+		// BEGIN DEPRECATED PARAMETERS
+		BOOL_MEMBER(textord_tabfind_vertical_horizontal_mix, true,
+			"find horizontal lines such as headers in vertical page mode",
+			this->params()),
+		INT_MEMBER(tessedit_ok_mode, 5, "Acceptance decision algorithm",
+			this->params()),
+		BOOL_INIT_MEMBER(load_fixed_length_dawgs, true,
+			"Load fixed length dawgs"
+			" (e.g. for non-space delimited languages)",
+			this->params()),
+		INT_MEMBER(segment_debug, 0, "Debug the whole segmentation process",
+			this->params()),
+		BOOL_MEMBER(permute_debug, 0, "Debug char permutation process",
+			this->params()),
+		double_MEMBER(bestrate_pruning_factor, 2.0,
+			"Multiplying factor of"
+			" current best rate to prune other hypotheses",
+			this->params()),
+		BOOL_MEMBER(permute_script_word, 0,
+			"Turn on word script consistency permuter", this->params()),
+		BOOL_MEMBER(segment_segcost_rating, 0,
+			"incorporate segmentation cost in word rating?",
+			this->params()),
+		double_MEMBER(segment_reward_script, 0.95,
+			"Score multipler for script consistency within a word. "
+			"Being a 'reward' factor, it should be <= 1. "
+			"Smaller value implies bigger reward.",
+			this->params()),
+		BOOL_MEMBER(permute_fixed_length_dawg, 0,
+			"Turn on fixed-length phrasebook search permuter",
+			this->params()),
+		BOOL_MEMBER(permute_chartype_word, 0,
+			"Turn on character type (property) consistency permuter",
+			this->params()),
+		double_MEMBER(segment_reward_chartype, 0.97,
+			"Score multipler for char type consistency within a word. ",
+			this->params()),
+		double_MEMBER(segment_reward_ngram_best_choice, 0.99,
+			"Score multipler for ngram permuter's best choice"
+			" (only used in the Han script path).",
+			this->params()),
+		BOOL_MEMBER(ngram_permuter_activated, false,
+			"Activate character-level n-gram-based permuter",
+			this->params()),
+		BOOL_MEMBER(permute_only_top, false, "Run only the top choice permuter",
+			this->params()),
+		INT_MEMBER(language_model_fixed_length_choices_depth, 3,
+			"Depth of blob choice lists to explore"
+			" when fixed length dawgs are on",
+			this->params()),
+		BOOL_MEMBER(use_new_state_cost, FALSE,
+			"use new state cost heuristics for segmentation state"
+			" evaluation",
+			this->params()),
+		double_MEMBER(heuristic_segcost_rating_base, 1.25,
+			"base factor for adding segmentation cost into word rating."
+			"It's a multiplying factor, the larger the value above 1, "
+			"the bigger the effect of segmentation cost.",
+			this->params()),
+		double_MEMBER(heuristic_weight_rating, 1.0,
+			"weight associated with char rating in combined cost of"
+			"state",
+			this->params()),
+		double_MEMBER(heuristic_weight_width, 1000.0,
+			"weight associated with width evidence in combined cost of"
+			" state",
+			this->params()),
+		double_MEMBER(heuristic_weight_seamcut, 0.0,
+			"weight associated with seam cut in combined cost of state",
+			this->params()),
+		double_MEMBER(heuristic_max_char_wh_ratio, 2.0,
+			"max char width-to-height ratio allowed in segmentation",
+			this->params()),
+		BOOL_MEMBER(enable_new_segsearch, true,
+			"Enable new segmentation search path.", this->params()),
+		double_MEMBER(segsearch_max_fixed_pitch_char_wh_ratio, 2.0,
+			"Maximum character width-to-height ratio for"
+			" fixed-pitch fonts",
+			this->params()),
+		// END DEPRECATED PARAMETERS
+
+		backup_config_file_(NULL),
+		pix_binary_(NULL),
+		cube_binary_(NULL),
+		pix_grey_(NULL),
+		pix_original_(NULL),
+		pix_thresholds_(NULL),
+		source_resolution_(0),
+		textord_(this),
+		right_to_left_(false),
+		scaled_color_(NULL),
+		scaled_factor_(-1),
+		deskew_(1.0f, 0.0f),
+		reskew_(1.0f, 0.0f),
+		most_recently_used_(this),
+		font_table_size_(0),
+#ifndef NO_CUBE_BUILD
+		cube_cntxt_(NULL),
+		tess_cube_combiner_(NULL),
+#endif
+		equ_detect_(NULL) {
+	}
+
+	Tesseract::~Tesseract() {
+		Clear();
+		pixDestroy(&pix_original_);
+		end_tesseract();
+		sub_langs_.delete_data_pointers();
+#ifndef NO_CUBE_BUILD
+		// Delete cube objects.
+		if (cube_cntxt_ != NULL) {
+			delete cube_cntxt_;
+			cube_cntxt_ = NULL;
+		}
+		if (tess_cube_combiner_ != NULL) {
+			delete tess_cube_combiner_;
+			tess_cube_combiner_ = NULL;
+		}
+#endif
+	}
+
+	void Tesseract::Clear() {
+		pixDestroy(&pix_binary_);
+		pixDestroy(&cube_binary_);
+		pixDestroy(&pix_grey_);
+		pixDestroy(&pix_thresholds_);
+		pixDestroy(&scaled_color_);
+		deskew_ = FCOORD(1.0f, 0.0f);
+		reskew_ = FCOORD(1.0f, 0.0f);
+		splitter_.Clear();
+		scaled_factor_ = -1;
+		for (int i = 0; i < sub_langs_.size(); ++i)
+			sub_langs_[i]->Clear();
+	}
+
+	void Tesseract::SetEquationDetect(EquationDetect* detector) {
+		equ_detect_ = detector;
+		equ_detect_->SetLangTesseract(this);
+	}
+
+	// Clear all memory of adaption for this and all subclassifiers.
+	void Tesseract::ResetAdaptiveClassifier() {
+		ResetAdaptiveClassifierInternal();
+		for (int i = 0; i < sub_langs_.size(); ++i) {
+			sub_langs_[i]->ResetAdaptiveClassifierInternal();
+		}
+	}
+
+	// Clear the document dictionary for this and all subclassifiers.
+	void Tesseract::ResetDocumentDictionary() {
+		getDict().ResetDocumentDictionary();
+		for (int i = 0; i < sub_langs_.size(); ++i) {
+			sub_langs_[i]->getDict().ResetDocumentDictionary();
+		}
+	}
+
+	void Tesseract::SetBlackAndWhitelist() {
+		// Set the white and blacklists (if any)
+		unicharset.set_black_and_whitelist(tessedit_char_blacklist.string(),
+			tessedit_char_whitelist.string(),
+			tessedit_char_unblacklist.string());
+		// Black and white lists should apply to all loaded classifiers.
+		for (int i = 0; i < sub_langs_.size(); ++i) {
+			sub_langs_[i]->unicharset.set_black_and_whitelist(
+				tessedit_char_blacklist.string(), tessedit_char_whitelist.string(),
+				tessedit_char_unblacklist.string());
+		}
+	}
+
+	// Perform steps to prepare underlying binary image/other data structures for
+	// page segmentation.
+	void Tesseract::PrepareForPageseg() {
+		textord_.set_use_cjk_fp_model(textord_use_cjk_fp_model);
+		pixDestroy(&cube_binary_);
+		cube_binary_ = pixClone(pix_binary());
+		// Find the max splitter strategy over all langs.
+		ShiroRekhaSplitter::SplitStrategy max_pageseg_strategy =
+			static_cast<ShiroRekhaSplitter::SplitStrategy>(
+				static_cast<inT32>(pageseg_devanagari_split_strategy));
+		for (int i = 0; i < sub_langs_.size(); ++i) {
+			ShiroRekhaSplitter::SplitStrategy pageseg_strategy =
+				static_cast<ShiroRekhaSplitter::SplitStrategy>(
+					static_cast<inT32>(sub_langs_[i]->pageseg_devanagari_split_strategy));
+			if (pageseg_strategy > max_pageseg_strategy)
+				max_pageseg_strategy = pageseg_strategy;
+			// Clone the cube image to all the sub langs too.
+			pixDestroy(&sub_langs_[i]->cube_binary_);
+			sub_langs_[i]->cube_binary_ = pixClone(pix_binary());
+			pixDestroy(&sub_langs_[i]->pix_binary_);
+			sub_langs_[i]->pix_binary_ = pixClone(pix_binary());
+		}
+		// Perform shiro-rekha (top-line) splitting and replace the current image by
+		// the newly splitted image.
+		splitter_.set_orig_pix(pix_binary());
+		splitter_.set_pageseg_split_strategy(max_pageseg_strategy);
+		if (splitter_.Split(true)) {
+			ASSERT_HOST(splitter_.splitted_image());
+			pixDestroy(&pix_binary_);
+			pix_binary_ = pixClone(splitter_.splitted_image());
+		}
+	}
+
+	// Perform steps to prepare underlying binary image/other data structures for
+	// OCR. The current segmentation is required by this method.
+	// Note that this method resets pix_binary_ to the original binarized image,
+	// which may be different from the image actually used for OCR depending on the
+	// value of devanagari_ocr_split_strategy.
+	void Tesseract::PrepareForTessOCR(BLOCK_LIST* block_list,
+		Tesseract* osd_tess, OSResults* osr) {
+		// Find the max splitter strategy over all langs.
+		ShiroRekhaSplitter::SplitStrategy max_ocr_strategy =
+			static_cast<ShiroRekhaSplitter::SplitStrategy>(
+				static_cast<inT32>(ocr_devanagari_split_strategy));
+		for (int i = 0; i < sub_langs_.size(); ++i) {
+			ShiroRekhaSplitter::SplitStrategy ocr_strategy =
+				static_cast<ShiroRekhaSplitter::SplitStrategy>(
+					static_cast<inT32>(sub_langs_[i]->ocr_devanagari_split_strategy));
+			if (ocr_strategy > max_ocr_strategy)
+				max_ocr_strategy = ocr_strategy;
+		}
+		// Utilize the segmentation information available.
+		splitter_.set_segmentation_block_list(block_list);
+		splitter_.set_ocr_split_strategy(max_ocr_strategy);
+		// Run the splitter for OCR
+		bool split_for_ocr = splitter_.Split(false);
+		// Restore pix_binary to the binarized original pix for future reference.
+		ASSERT_HOST(splitter_.orig_pix());
+		pixDestroy(&pix_binary_);
+		pix_binary_ = pixClone(splitter_.orig_pix());
+		// If the pageseg and ocr strategies are different, refresh the block list
+		// (from the last SegmentImage call) with blobs from the real image to be used
+		// for OCR.
+		if (splitter_.HasDifferentSplitStrategies()) {
+			BLOCK block("", TRUE, 0, 0, 0, 0, pixGetWidth(pix_binary_),
+				pixGetHeight(pix_binary_));
+			Pix* pix_for_ocr = split_for_ocr ? splitter_.splitted_image() :
+				splitter_.orig_pix();
+			extract_edges(pix_for_ocr, &block);
+			splitter_.RefreshSegmentationWithNewBlobs(block.blob_list());
+		}
+		// The splitter isn't needed any more after this, so save memory by clearing.
+		splitter_.Clear();
+	}
+
+}  // namespace tesseract
--- a/hgdriver/3rdparty/hgOCR/include/ccmain/tesseractclass.h
+++ b/hgdriver/3rdparty/hgOCR/include/ccmain/tesseractclass.h
--- a/hgdriver/3rdparty/hgOCR/include/ccmain/tessvars.cpp
+++ b/hgdriver/3rdparty/hgOCR/include/ccmain/tessvars.cpp
@ -0,0 +1,24 @@
+/**********************************************************************
+ * File:        tessvars.cpp  (Formerly tessvars.c)
+ * Description: Variables and other globals for tessedit.
+ * Author:		Ray Smith
+ * Created:		Mon Apr 13 13:13:23 BST 1992
+ *
+ * (C) Copyright 1992, Hewlett-Packard Ltd.
+ ** Licensed under the Apache License, Version 2.0 (the "License");
+ ** you may not use this file except in compliance with the License.
+ ** You may obtain a copy of the License at
+ ** http://www.apache.org/licenses/LICENSE-2.0
+ ** Unless required by applicable law or agreed to in writing, software
+ ** distributed under the License is distributed on an "AS IS" BASIS,
+ ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ ** See the License for the specific language governing permissions and
+ ** limitations under the License.
+ *
+ **********************************************************************/
+
+#include <stdio.h>
+
+#include  "tessvars.h"
+
+FILE *debug_fp = stderr;  // write debug stuff here
--- a/hgdriver/3rdparty/hgOCR/include/ccmain/tessvars.h
+++ b/hgdriver/3rdparty/hgOCR/include/ccmain/tessvars.h
@ -0,0 +1,27 @@
+/**********************************************************************
+ * File:        tessvars.h  (Formerly tessvars.h)
+ * Description: Variables and other globals for tessedit.
+ * Author:		Ray Smith
+ * Created:		Mon Apr 13 13:13:23 BST 1992
+ *
+ * (C) Copyright 1992, Hewlett-Packard Ltd.
+ ** Licensed under the Apache License, Version 2.0 (the "License");
+ ** you may not use this file except in compliance with the License.
+ ** You may obtain a copy of the License at
+ ** http://www.apache.org/licenses/LICENSE-2.0
+ ** Unless required by applicable law or agreed to in writing, software
+ ** distributed under the License is distributed on an "AS IS" BASIS,
+ ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ ** See the License for the specific language governing permissions and
+ ** limitations under the License.
+ *
+ **********************************************************************/
+
+#ifndef           TESSVARS_H
+#define           TESSVARS_H
+
+#include <stdio.h>
+
+
+extern FILE *debug_fp;    // write debug stuff here
+#endif
--- a/hgdriver/3rdparty/hgOCR/include/ccmain/tfacepp.cpp
+++ b/hgdriver/3rdparty/hgOCR/include/ccmain/tfacepp.cpp
@ -0,0 +1,330 @@
+/**********************************************************************
+ * File:        tfacepp.cpp  (Formerly tface++.c)
+ * Description: C++ side of the C/C++ Tess/Editor interface.
+ * Author:                  Ray Smith
+ * Created:                 Thu Apr 23 15:39:23 BST 1992
+ *
+ * (C) Copyright 1992, Hewlett-Packard Ltd.
+ ** Licensed under the Apache License, Version 2.0 (the "License");
+ ** you may not use this file except in compliance with the License.
+ ** You may obtain a copy of the License at
+ ** http://www.apache.org/licenses/LICENSE-2.0
+ ** Unless required by applicable law or agreed to in writing, software
+ ** distributed under the License is distributed on an "AS IS" BASIS,
+ ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ ** See the License for the specific language governing permissions and
+ ** limitations under the License.
+ *
+ **********************************************************************/
+
+#ifdef _MSC_VER
+#pragma warning(disable:4244)  // Conversion warnings
+#pragma warning(disable:4305)  // int/float warnings
+#pragma warning(disable:4800)  // int/bool warnings
+#endif
+
+#include <math.h>
+
+#include "blamer.h"
+#include "errcode.h"
+#include "ratngs.h"
+#include "reject.h"
+#include "tesseractclass.h"
+#include "werd.h"
+
+#define MAX_UNDIVIDED_LENGTH 24
+
+
+
+ /**********************************************************************
+  * recog_word
+  *
+  * Convert the word to tess form and pass it to the tess segmenter.
+  * Convert the output back to editor form.
+  **********************************************************************/
+namespace tesseract {
+	void Tesseract::recog_word(WERD_RES *word) {
+		if (wordrec_skip_no_truth_words && (word->blamer_bundle == NULL ||
+			word->blamer_bundle->incorrect_result_reason() == IRR_NO_TRUTH)) {
+			if (classify_debug_level) tprintf("No truth for word - skipping\n");
+			word->tess_failed = true;
+			return;
+		}
+		ASSERT_HOST(!word->chopped_word->blobs.empty());
+		recog_word_recursive(word);
+		word->SetupBoxWord();
+		if (word->best_choice->length() != word->box_word->length()) {
+			tprintf("recog_word ASSERT FAIL String:\"%s\"; "
+				"Strlen=%d; #Blobs=%d\n",
+				word->best_choice->debug_string().string(),
+				word->best_choice->length(), word->box_word->length());
+		}
+		ASSERT_HOST(word->best_choice->length() == word->box_word->length());
+		// Check that the ratings matrix size matches the sum of all the
+		// segmentation states.
+		if (!word->StatesAllValid()) {
+			tprintf("Not all words have valid states relative to ratings matrix!!");
+			word->DebugWordChoices(true, NULL);
+			ASSERT_HOST(word->StatesAllValid());
+		}
+		if (tessedit_override_permuter) {
+			/* Override the permuter type if a straight dictionary check disagrees. */
+			uinT8 perm_type = word->best_choice->permuter();
+			if ((perm_type != SYSTEM_DAWG_PERM) &&
+				(perm_type != FREQ_DAWG_PERM) && (perm_type != USER_DAWG_PERM)) {
+				uinT8 real_dict_perm_type = dict_word(*word->best_choice);
+				if (((real_dict_perm_type == SYSTEM_DAWG_PERM) ||
+					(real_dict_perm_type == FREQ_DAWG_PERM) ||
+					(real_dict_perm_type == USER_DAWG_PERM)) &&
+					(alpha_count(word->best_choice->unichar_string().string(),
+						word->best_choice->unichar_lengths().string()) > 0)) {
+					word->best_choice->set_permuter(real_dict_perm_type);  // use dict perm
+				}
+			}
+			if (tessedit_rejection_debug &&
+				perm_type != word->best_choice->permuter()) {
+				tprintf("Permuter Type Flipped from %d to %d\n",
+					perm_type, word->best_choice->permuter());
+			}
+		}
+		// Factored out from control.cpp
+		ASSERT_HOST((word->best_choice == NULL) == (word->raw_choice == NULL));
+		if (word->best_choice == NULL || word->best_choice->length() == 0 ||
+			static_cast<int>(strspn(word->best_choice->unichar_string().string(),
+				" ")) == word->best_choice->length()) {
+			word->tess_failed = true;
+			word->reject_map.initialise(word->box_word->length());
+			word->reject_map.rej_word_tess_failure();
+		}
+		else {
+			word->tess_failed = false;
+		}
+	}
+
+
+	/**********************************************************************
+	 * recog_word_recursive
+	 *
+	 * Convert the word to tess form and pass it to the tess segmenter.
+	 * Convert the output back to editor form.
+	 **********************************************************************/
+	void Tesseract::recog_word_recursive(WERD_RES *word) {
+		int word_length = word->chopped_word->NumBlobs();  // no of blobs
+		if (word_length > MAX_UNDIVIDED_LENGTH) {
+			return split_and_recog_word(word);
+		}
+		cc_recog(word);
+		word_length = word->rebuild_word->NumBlobs();  // No of blobs in output.
+
+		// Do sanity checks and minor fixes on best_choice.
+		if (word->best_choice->length() > word_length) {
+			word->best_choice->make_bad();  // should never happen
+			tprintf("recog_word: Discarded long string \"%s\""
+				" (%d characters vs %d blobs)\n",
+				word->best_choice->unichar_string().string(),
+				word->best_choice->length(), word_length);
+			tprintf("Word is at:");
+			word->word->bounding_box().print();
+		}
+		if (word->best_choice->length() < word_length) {
+			UNICHAR_ID space_id = unicharset.unichar_to_id(" ");
+			while (word->best_choice->length() < word_length) {
+				word->best_choice->append_unichar_id(space_id, 1, 0.0,
+					word->best_choice->certainty());
+			}
+		}
+	}
+
+
+	/**********************************************************************
+	 * split_and_recog_word
+	 *
+	 * Split the word into 2 smaller pieces at the largest gap.
+	 * Recognize the pieces and stick the results back together.
+	 **********************************************************************/
+	void Tesseract::split_and_recog_word(WERD_RES *word) {
+		// Find the biggest blob gap in the chopped_word.
+		int bestgap = -MAX_INT32;
+		int split_index = 0;
+		for (int b = 1; b < word->chopped_word->NumBlobs(); ++b) {
+			TBOX prev_box = word->chopped_word->blobs[b - 1]->bounding_box();
+			TBOX blob_box = word->chopped_word->blobs[b]->bounding_box();
+			int gap = blob_box.left() - prev_box.right();
+			if (gap > bestgap) {
+				bestgap = gap;
+				split_index = b;
+			}
+		}
+		ASSERT_HOST(split_index > 0);
+
+		WERD_RES *word2 = NULL;
+		BlamerBundle *orig_bb = NULL;
+		split_word(word, split_index, &word2, &orig_bb);
+
+		// Recognize the first part of the word.
+		recog_word_recursive(word);
+		// Recognize the second part of the word.
+		recog_word_recursive(word2);
+
+		join_words(word, word2, orig_bb);
+	}
+
+
+	/**********************************************************************
+	 * split_word
+	 *
+	 * Split a given WERD_RES in place into two smaller words for recognition.
+	 * split_pt is the index of the first blob to go in the second word.
+	 * The underlying word is left alone, only the TWERD (and subsequent data)
+	 * are split up.  orig_blamer_bundle is set to the original blamer bundle,
+	 * and will now be owned by the caller.  New blamer bundles are forged for the
+	 * two pieces.
+	 **********************************************************************/
+	void Tesseract::split_word(WERD_RES *word,
+		int split_pt,
+		WERD_RES **right_piece,
+		BlamerBundle **orig_blamer_bundle) const {
+		ASSERT_HOST(split_pt > 0 && split_pt < word->chopped_word->NumBlobs());
+
+		// Save a copy of the blamer bundle so we can try to reconstruct it below.
+		BlamerBundle *orig_bb =
+			word->blamer_bundle ? new BlamerBundle(*word->blamer_bundle) : NULL;
+
+		WERD_RES *word2 = new WERD_RES(*word);
+
+		// blow away the copied chopped_word, as we want to work with
+		// the blobs from the input chopped_word so seam_arrays can be merged.
+		TWERD *chopped = word->chopped_word;
+		TWERD *chopped2 = new TWERD;
+		chopped2->blobs.reserve(chopped->NumBlobs() - split_pt);
+		for (int i = split_pt; i < chopped->NumBlobs(); ++i) {
+			chopped2->blobs.push_back(chopped->blobs[i]);
+		}
+		chopped->blobs.truncate(split_pt);
+		word->chopped_word = NULL;
+		delete word2->chopped_word;
+		word2->chopped_word = NULL;
+
+		const UNICHARSET &unicharset = *word->uch_set;
+		word->ClearResults();
+		word2->ClearResults();
+		word->chopped_word = chopped;
+		word2->chopped_word = chopped2;
+		word->SetupBasicsFromChoppedWord(unicharset);
+		word2->SetupBasicsFromChoppedWord(unicharset);
+
+		// Try to adjust the blamer bundle.
+		if (orig_bb != NULL) {
+			// TODO(rays) Looks like a leak to me.
+			// orig_bb should take, rather than copy.
+			word->blamer_bundle = new BlamerBundle();
+			word2->blamer_bundle = new BlamerBundle();
+			orig_bb->SplitBundle(chopped->blobs.back()->bounding_box().right(),
+				word2->chopped_word->blobs[0]->bounding_box().left(),
+				wordrec_debug_blamer,
+				word->blamer_bundle, word2->blamer_bundle);
+		}
+
+		*right_piece = word2;
+		*orig_blamer_bundle = orig_bb;
+	}
+
+
+	/**********************************************************************
+	 * join_words
+	 *
+	 * The opposite of split_word():
+	 *  join word2 (including any recognized data / seam array / etc)
+	 *  onto the right of word and then delete word2.
+	 *  Also, if orig_bb is provided, stitch it back into word.
+	 **********************************************************************/
+	void Tesseract::join_words(WERD_RES *word,
+		WERD_RES *word2,
+		BlamerBundle *orig_bb) const {
+		TBOX prev_box = word->chopped_word->blobs.back()->bounding_box();
+		TBOX blob_box = word2->chopped_word->blobs[0]->bounding_box();
+		// Tack the word2 outputs onto the end of the word outputs.
+		word->chopped_word->blobs += word2->chopped_word->blobs;
+		word->rebuild_word->blobs += word2->rebuild_word->blobs;
+		word2->chopped_word->blobs.clear();
+		word2->rebuild_word->blobs.clear();
+		TPOINT split_pt;
+		split_pt.x = (prev_box.right() + blob_box.left()) / 2;
+		split_pt.y = (prev_box.top() + prev_box.bottom() +
+			blob_box.top() + blob_box.bottom()) / 4;
+		// Move the word2 seams onto the end of the word1 seam_array.
+		// Since the seam list is one element short, an empty seam marking the
+		// end of the last blob in the first word is needed first.
+		word->seam_array.push_back(new SEAM(0.0f, split_pt));
+		word->seam_array += word2->seam_array;
+		word2->seam_array.truncate(0);
+		// Fix widths and gaps.
+		word->blob_widths += word2->blob_widths;
+		word->blob_gaps += word2->blob_gaps;
+		// Fix the ratings matrix.
+		int rat1 = word->ratings->dimension();
+		int rat2 = word2->ratings->dimension();
+		word->ratings->AttachOnCorner(word2->ratings);
+		ASSERT_HOST(word->ratings->dimension() == rat1 + rat2);
+		word->best_state += word2->best_state;
+		// Append the word choices.
+		*word->raw_choice += *word2->raw_choice;
+
+		// How many alt choices from each should we try to get?
+		const int kAltsPerPiece = 2;
+		// When do we start throwing away extra alt choices?
+		const int kTooManyAltChoices = 100;
+
+		// Construct the cartesian product of the best_choices of word(1) and word2.
+		WERD_CHOICE_LIST joined_choices;
+		WERD_CHOICE_IT jc_it(&joined_choices);
+		WERD_CHOICE_IT bc1_it(&word->best_choices);
+		WERD_CHOICE_IT bc2_it(&word2->best_choices);
+		int num_word1_choices = word->best_choices.length();
+		int total_joined_choices = num_word1_choices;
+		// Nota Bene: For the main loop here, we operate only on the 2nd and greater
+		// word2 choices, and put them in the joined_choices list. The 1st word2
+		// choice gets added to the original word1 choices in-place after we have
+		// finished with them.
+		int bc2_index = 1;
+		for (bc2_it.forward(); !bc2_it.at_first(); bc2_it.forward(), ++bc2_index) {
+			if (total_joined_choices >= kTooManyAltChoices &&
+				bc2_index > kAltsPerPiece)
+				break;
+			int bc1_index = 0;
+			for (bc1_it.move_to_first(); bc1_index < num_word1_choices;
+				++bc1_index, bc1_it.forward()) {
+				if (total_joined_choices >= kTooManyAltChoices &&
+					bc1_index > kAltsPerPiece)
+					break;
+				WERD_CHOICE *wc = new WERD_CHOICE(*bc1_it.data());
+				*wc += *bc2_it.data();
+				jc_it.add_after_then_move(wc);
+				++total_joined_choices;
+			}
+		}
+		// Now that we've filled in as many alternates as we want, paste the best
+		// choice for word2 onto the original word alt_choices.
+		bc1_it.move_to_first();
+		bc2_it.move_to_first();
+		for (bc1_it.mark_cycle_pt(); !bc1_it.cycled_list(); bc1_it.forward()) {
+			*bc1_it.data() += *bc2_it.data();
+		}
+		bc1_it.move_to_last();
+		bc1_it.add_list_after(&joined_choices);
+
+		// Restore the pointer to original blamer bundle and combine blamer
+		// information recorded in the splits.
+		if (orig_bb != NULL) {
+			orig_bb->JoinBlames(*word->blamer_bundle, *word2->blamer_bundle,
+				wordrec_debug_blamer);
+			delete word->blamer_bundle;
+			word->blamer_bundle = orig_bb;
+		}
+		word->SetupBoxWord();
+		word->reject_map.initialise(word->box_word->length());
+		delete word2;
+	}
+
+
+}  // namespace tesseract
--- a/hgdriver/3rdparty/hgOCR/include/ccmain/thresholder.cpp
+++ b/hgdriver/3rdparty/hgOCR/include/ccmain/thresholder.cpp
@ -0,0 +1,334 @@
+///////////////////////////////////////////////////////////////////////
+// File:        thresholder.cpp
+// Description: Base API for thresolding images in tesseract.
+// Author:      Ray Smith
+// Created:     Mon May 12 11:28:15 PDT 2008
+//
+// (C) Copyright 2008, Google Inc.
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+// http://www.apache.org/licenses/LICENSE-2.0
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+///////////////////////////////////////////////////////////////////////
+
+#include "allheaders.h"
+
+#include "thresholder.h"
+
+#include <string.h>
+
+#include "otsuthr.h"
+
+#include "openclwrapper.h"
+
+namespace tesseract {
+
+	ImageThresholder::ImageThresholder()
+		: pix_(NULL),
+		image_width_(0), image_height_(0),
+		pix_channels_(0), pix_wpl_(0),
+		scale_(1), yres_(300), estimated_res_(300) {
+		SetRectangle(0, 0, 0, 0);
+	}
+
+	ImageThresholder::~ImageThresholder() {
+		Clear();
+	}
+
+	// Destroy the Pix if there is one, freeing memory.
+	void ImageThresholder::Clear() {
+		pixDestroy(&pix_);
+	}
+
+	// Return true if no image has been set.
+	bool ImageThresholder::IsEmpty() const {
+		return pix_ == NULL;
+	}
+
+	// SetImage makes a copy of all the image data, so it may be deleted
+	// immediately after this call.
+	// Greyscale of 8 and color of 24 or 32 bits per pixel may be given.
+	// Palette color images will not work properly and must be converted to
+	// 24 bit.
+	// Binary images of 1 bit per pixel may also be given but they must be
+	// byte packed with the MSB of the first byte being the first pixel, and a
+	// one pixel is WHITE. For binary images set bytes_per_pixel=0.
+	void ImageThresholder::SetImage(const unsigned char* imagedata,
+		int width, int height,
+		int bytes_per_pixel, int bytes_per_line) {
+		int bpp = bytes_per_pixel * 8;
+		if (bpp == 0) bpp = 1;
+		Pix* pix = pixCreate(width, height, bpp == 24 ? 32 : bpp);
+		l_uint32* data = pixGetData(pix);
+		int wpl = pixGetWpl(pix);
+		switch (bpp) {
+		case 1:
+			for (int y = 0; y < height; ++y, data += wpl, imagedata += bytes_per_line) {
+				for (int x = 0; x < width; ++x) {
+					if (imagedata[x / 8] & (0x80 >> (x % 8)))
+						CLEAR_DATA_BIT(data, x);
+					else
+						SET_DATA_BIT(data, x);
+				}
+			}
+			break;
+
+		case 8:
+			// Greyscale just copies the bytes in the right order.
+			for (int y = 0; y < height; ++y, data += wpl, imagedata += bytes_per_line) {
+				for (int x = 0; x < width; ++x)
+					SET_DATA_BYTE(data, x, imagedata[x]);
+			}
+			break;
+
+		case 24:
+			// Put the colors in the correct places in the line buffer.
+			for (int y = 0; y < height; ++y, imagedata += bytes_per_line) {
+				for (int x = 0; x < width; ++x, ++data) {
+					SET_DATA_BYTE(data, COLOR_RED, imagedata[3 * x]);
+					SET_DATA_BYTE(data, COLOR_GREEN, imagedata[3 * x + 1]);
+					SET_DATA_BYTE(data, COLOR_BLUE, imagedata[3 * x + 2]);
+				}
+			}
+			break;
+
+		case 32:
+			// Maintain byte order consistency across different endianness.
+			for (int y = 0; y < height; ++y, imagedata += bytes_per_line, data += wpl) {
+				for (int x = 0; x < width; ++x) {
+					data[x] = (imagedata[x * 4] << 24) | (imagedata[x * 4 + 1] << 16) |
+						(imagedata[x * 4 + 2] << 8) | imagedata[x * 4 + 3];
+				}
+			}
+			break;
+
+		default:
+			tprintf("Cannot convert RAW image to Pix with bpp = %d\n", bpp);
+		}
+		pixSetYRes(pix, 300);
+		SetImage(pix);
+		pixDestroy(&pix);
+	}
+
+	// Store the coordinates of the rectangle to process for later use.
+	// Doesn't actually do any thresholding.
+	void ImageThresholder::SetRectangle(int left, int top, int width, int height) {
+		rect_left_ = left;
+		rect_top_ = top;
+		rect_width_ = width;
+		rect_height_ = height;
+	}
+
+	// Get enough parameters to be able to rebuild bounding boxes in the
+	// original image (not just within the rectangle).
+	// Left and top are enough with top-down coordinates, but
+	// the height of the rectangle and the image are needed for bottom-up.
+	void ImageThresholder::GetImageSizes(int* left, int* top,
+		int* width, int* height,
+		int* imagewidth, int* imageheight) {
+		*left = rect_left_;
+		*top = rect_top_;
+		*width = rect_width_;
+		*height = rect_height_;
+		*imagewidth = image_width_;
+		*imageheight = image_height_;
+	}
+
+	// Pix vs raw, which to use? Pix is the preferred input for efficiency,
+	// since raw buffers are copied.
+	// SetImage for Pix clones its input, so the source pix may be pixDestroyed
+	// immediately after, but may not go away until after the Thresholder has
+	// finished with it.
+	void ImageThresholder::SetImage(const Pix* pix) {
+		if (pix_ != NULL)
+			pixDestroy(&pix_);
+		Pix* src = const_cast<Pix*>(pix);
+		int depth;
+		pixGetDimensions(src, &image_width_, &image_height_, &depth);
+		// Convert the image as necessary so it is one of binary, plain RGB, or
+		// 8 bit with no colormap. Guarantee that we always end up with our own copy,
+		// not just a clone of the input.
+		if (pixGetColormap(src)) {
+			Pix* tmp = pixRemoveColormap(src, REMOVE_CMAP_BASED_ON_SRC);
+			depth = pixGetDepth(tmp);
+			if (depth > 1 && depth < 8) {
+				pix_ = pixConvertTo8(tmp, false);
+				pixDestroy(&tmp);
+			}
+			else {
+				pix_ = tmp;
+			}
+		}
+		else if (depth > 1 && depth < 8) {
+			pix_ = pixConvertTo8(src, false);
+		}
+		else {
+			pix_ = pixCopy(NULL, src);
+		}
+		depth = pixGetDepth(pix_);
+		pix_channels_ = depth / 8;
+		pix_wpl_ = pixGetWpl(pix_);
+		scale_ = 1;
+		estimated_res_ = yres_ = pixGetYRes(pix_);
+		Init();
+	}
+
+	// Threshold the source image as efficiently as possible to the output Pix.
+	// Creates a Pix and sets pix to point to the resulting pointer.
+	// Caller must use pixDestroy to free the created Pix.
+	void ImageThresholder::ThresholdToPix(PageSegMode pageseg_mode, Pix** pix) {
+		if (pix_channels_ == 0) {
+			// We have a binary image, but it still has to be copied, as this API
+			// allows the caller to modify the output.
+			Pix* original = GetPixRect();
+			*pix = pixCopy(NULL, original);
+			pixDestroy(&original);
+		}
+		else {
+			OtsuThresholdRectToPix(pix_, pix);
+		}
+	}
+
+	// Gets a pix that contains an 8 bit threshold value at each pixel. The
+	// returned pix may be an integer reduction of the binary image such that
+	// the scale factor may be inferred from the ratio of the sizes, even down
+	// to the extreme of a 1x1 pixel thresholds image.
+	// Ideally the 8 bit threshold should be the exact threshold used to generate
+	// the binary image in ThresholdToPix, but this is not a hard constraint.
+	// Returns NULL if the input is binary. PixDestroy after use.
+	Pix* ImageThresholder::GetPixRectThresholds() {
+		if (IsBinary()) return NULL;
+		Pix* pix_grey = GetPixRectGrey();
+		int width = pixGetWidth(pix_grey);
+		int height = pixGetHeight(pix_grey);
+		int* thresholds;
+		int* hi_values;
+		OtsuThreshold(pix_grey, 0, 0, width, height, &thresholds, &hi_values);
+		pixDestroy(&pix_grey);
+		Pix* pix_thresholds = pixCreate(width, height, 8);
+		int threshold = thresholds[0] > 0 ? thresholds[0] : 128;
+		pixSetAllArbitrary(pix_thresholds, threshold);
+		delete[] thresholds;
+		delete[] hi_values;
+		return pix_thresholds;
+	}
+
+	// Common initialization shared between SetImage methods.
+	void ImageThresholder::Init() {
+		SetRectangle(0, 0, image_width_, image_height_);
+	}
+
+	// Get a clone/copy of the source image rectangle.
+	// The returned Pix must be pixDestroyed.
+	// This function will be used in the future by the page layout analysis, and
+	// the layout analysis that uses it will only be available with Leptonica,
+	// so there is no raw equivalent.
+	Pix* ImageThresholder::GetPixRect() {
+		if (IsFullImage()) {
+			// Just clone the whole thing.
+			return pixClone(pix_);
+		}
+		else {
+			// Crop to the given rectangle.
+			Box* box = boxCreate(rect_left_, rect_top_, rect_width_, rect_height_);
+			Pix* cropped = pixClipRectangle(pix_, box, NULL);
+			boxDestroy(&box);
+			return cropped;
+		}
+	}
+
+	// Get a clone/copy of the source image rectangle, reduced to greyscale,
+	// and at the same resolution as the output binary.
+	// The returned Pix must be pixDestroyed.
+	// Provided to the classifier to extract features from the greyscale image.
+	Pix* ImageThresholder::GetPixRectGrey() {
+		Pix* pix = GetPixRect();  // May have to be reduced to grey.
+		int depth = pixGetDepth(pix);
+		if (depth != 8) {
+			Pix* result = depth < 8 ? pixConvertTo8(pix, false)
+				: pixConvertRGBToLuminance(pix);
+			pixDestroy(&pix);
+			return result;
+		}
+		return pix;
+	}
+
+	// Otsu thresholds the rectangle, taking the rectangle from *this.
+	void ImageThresholder::OtsuThresholdRectToPix(Pix* src_pix,
+		Pix** out_pix) const {
+		PERF_COUNT_START("OtsuThresholdRectToPix")
+			int* thresholds;
+		int* hi_values;
+
+		int num_channels = OtsuThreshold(src_pix, rect_left_, rect_top_, rect_width_,
+			rect_height_, &thresholds, &hi_values);
+		// only use opencl if compiled w/ OpenCL and selected device is opencl
+#ifdef USE_OPENCL
+		OpenclDevice od;
+		if ((num_channels == 4 || num_channels == 1) &&
+			od.selectedDeviceIsOpenCL() && rect_top_ == 0 && rect_left_ == 0) {
+			od.ThresholdRectToPixOCL((unsigned char*)pixGetData(src_pix), num_channels,
+				pixGetWpl(src_pix) * 4, thresholds, hi_values,
+				out_pix /*pix_OCL*/, rect_height_, rect_width_,
+				rect_top_, rect_left_);
+		}
+		else {
+#endif
+			ThresholdRectToPix(src_pix, num_channels, thresholds, hi_values, out_pix);
+#ifdef USE_OPENCL
+		}
+#endif
+		delete[] thresholds;
+		delete[] hi_values;
+
+		PERF_COUNT_END
+	}
+
+	/// Threshold the rectangle, taking everything except the src_pix
+	/// from the class, using thresholds/hi_values to the output pix.
+	/// NOTE that num_channels is the size of the thresholds and hi_values
+	// arrays and also the bytes per pixel in src_pix.
+	void ImageThresholder::ThresholdRectToPix(Pix* src_pix,
+		int num_channels,
+		const int* thresholds,
+		const int* hi_values,
+		Pix** pix) const {
+		PERF_COUNT_START("ThresholdRectToPix")
+			*pix = pixCreate(rect_width_, rect_height_, 1);
+		uinT32* pixdata = pixGetData(*pix);
+		int wpl = pixGetWpl(*pix);
+		int src_wpl = pixGetWpl(src_pix);
+		uinT32* srcdata = pixGetData(src_pix);
+		for (int y = 0; y < rect_height_; ++y) {
+			const uinT32* linedata = srcdata + (y + rect_top_) * src_wpl;
+			uinT32* pixline = pixdata + y * wpl;
+			for (int x = 0; x < rect_width_; ++x) {
+				bool white_result = true;
+				for (int ch = 0; ch < num_channels; ++ch) {
+					int pixel = GET_DATA_BYTE(const_cast<void*>(
+						reinterpret_cast<const void *>(linedata)),
+						(x + rect_left_) * num_channels + ch);
+					if (hi_values[ch] >= 0 &&
+						(pixel > thresholds[ch]) == (hi_values[ch] == 0)) {
+						white_result = false;
+						break;
+					}
+				}
+				if (white_result)
+					CLEAR_DATA_BIT(pixline, x);
+				else
+					SET_DATA_BIT(pixline, x);
+			}
+		}
+
+		PERF_COUNT_END
+	}
+
+}  // namespace tesseract.
+
--- a/hgdriver/3rdparty/hgOCR/include/ccmain/thresholder.h
+++ b/hgdriver/3rdparty/hgOCR/include/ccmain/thresholder.h
@ -0,0 +1,189 @@
+///////////////////////////////////////////////////////////////////////
+// File:        thresholder.h
+// Description: Base API for thresolding images in tesseract.
+// Author:      Ray Smith
+// Created:     Mon May 12 11:00:15 PDT 2008
+//
+// (C) Copyright 2008, Google Inc.
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+// http://www.apache.org/licenses/LICENSE-2.0
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+///////////////////////////////////////////////////////////////////////
+
+#ifndef TESSERACT_CCMAIN_THRESHOLDER_H__
+#define TESSERACT_CCMAIN_THRESHOLDER_H__
+
+#include "platform.h"
+#include "publictypes.h"
+
+struct Pix;
+
+namespace tesseract {
+
+	/// Base class for all tesseract image thresholding classes.
+	/// Specific classes can add new thresholding methods by
+	/// overriding ThresholdToPix.
+	/// Each instance deals with a single image, but the design is intended to
+	/// be useful for multiple calls to SetRectangle and ThresholdTo* if
+	/// desired.
+	class TESS_API ImageThresholder {
+	public:
+		ImageThresholder();
+		virtual ~ImageThresholder();
+
+		/// Destroy the Pix if there is one, freeing memory.
+		virtual void Clear();
+
+		/// Return true if no image has been set.
+		bool IsEmpty() const;
+
+		/// SetImage makes a copy of all the image data, so it may be deleted
+		/// immediately after this call.
+		/// Greyscale of 8 and color of 24 or 32 bits per pixel may be given.
+		/// Palette color images will not work properly and must be converted to
+		/// 24 bit.
+		/// Binary images of 1 bit per pixel may also be given but they must be
+		/// byte packed with the MSB of the first byte being the first pixel, and a
+		/// one pixel is WHITE. For binary images set bytes_per_pixel=0.
+		void SetImage(const unsigned char* imagedata, int width, int height,
+			int bytes_per_pixel, int bytes_per_line);
+
+		/// Store the coordinates of the rectangle to process for later use.
+		/// Doesn't actually do any thresholding.
+		void SetRectangle(int left, int top, int width, int height);
+
+		/// Get enough parameters to be able to rebuild bounding boxes in the
+		/// original image (not just within the rectangle).
+		/// Left and top are enough with top-down coordinates, but
+		/// the height of the rectangle and the image are needed for bottom-up.
+		virtual void GetImageSizes(int* left, int* top, int* width, int* height,
+			int* imagewidth, int* imageheight);
+
+		/// Return true if the source image is color.
+		bool IsColor() const {
+			return pix_channels_ >= 3;
+		}
+
+		/// Returns true if the source image is binary.
+		bool IsBinary() const {
+			return pix_channels_ == 0;
+		}
+
+		int GetScaleFactor() const {
+			return scale_;
+		}
+
+		// Set the resolution of the source image in pixels per inch.
+		// This should be called right after SetImage(), and will let us return
+		// appropriate font sizes for the text.
+		void SetSourceYResolution(int ppi) {
+			yres_ = ppi;
+			estimated_res_ = ppi;
+		}
+		int GetSourceYResolution() const {
+			return yres_;
+		}
+		int GetScaledYResolution() const {
+			return scale_ * yres_;
+		}
+		// Set the resolution of the source image in pixels per inch, as estimated
+		// by the thresholder from the text size found during thresholding.
+		// This value will be used to set internal size thresholds during recognition
+		// and will not influence the output "point size." The default value is
+		// the same as the source resolution. (yres_)
+		void SetEstimatedResolution(int ppi) {
+			estimated_res_ = ppi;
+		}
+		// Returns the estimated resolution, including any active scaling.
+		// This value will be used to set internal size thresholds during recognition.
+		int GetScaledEstimatedResolution() const {
+			return scale_ * estimated_res_;
+		}
+
+		/// Pix vs raw, which to use? Pix is the preferred input for efficiency,
+		/// since raw buffers are copied.
+		/// SetImage for Pix clones its input, so the source pix may be pixDestroyed
+		/// immediately after, but may not go away until after the Thresholder has
+		/// finished with it.
+		void SetImage(const Pix* pix);
+
+		/// Threshold the source image as efficiently as possible to the output Pix.
+		/// Creates a Pix and sets pix to point to the resulting pointer.
+		/// Caller must use pixDestroy to free the created Pix.
+		virtual void ThresholdToPix(PageSegMode pageseg_mode, Pix** pix);
+
+		// Gets a pix that contains an 8 bit threshold value at each pixel. The
+		// returned pix may be an integer reduction of the binary image such that
+		// the scale factor may be inferred from the ratio of the sizes, even down
+		// to the extreme of a 1x1 pixel thresholds image.
+		// Ideally the 8 bit threshold should be the exact threshold used to generate
+		// the binary image in ThresholdToPix, but this is not a hard constraint.
+		// Returns NULL if the input is binary. PixDestroy after use.
+		virtual Pix* GetPixRectThresholds();
+
+		/// Get a clone/copy of the source image rectangle.
+		/// The returned Pix must be pixDestroyed.
+		/// This function will be used in the future by the page layout analysis, and
+		/// the layout analysis that uses it will only be available with Leptonica,
+		/// so there is no raw equivalent.
+		Pix* GetPixRect();
+
+		// Get a clone/copy of the source image rectangle, reduced to greyscale,
+		// and at the same resolution as the output binary.
+		// The returned Pix must be pixDestroyed.
+		// Provided to the classifier to extract features from the greyscale image.
+		virtual Pix* GetPixRectGrey();
+
+	protected:
+		// ----------------------------------------------------------------------
+		// Utility functions that may be useful components for other thresholders.
+
+		/// Common initialization shared between SetImage methods.
+		virtual void Init();
+
+		/// Return true if we are processing the full image.
+		bool IsFullImage() const {
+			return rect_left_ == 0 && rect_top_ == 0 &&
+				rect_width_ == image_width_ && rect_height_ == image_height_;
+		}
+
+		// Otsu thresholds the rectangle, taking the rectangle from *this.
+		void OtsuThresholdRectToPix(Pix* src_pix, Pix** out_pix) const;
+
+		/// Threshold the rectangle, taking everything except the src_pix
+		/// from the class, using thresholds/hi_values to the output pix.
+		/// NOTE that num_channels is the size of the thresholds and hi_values
+		// arrays and also the bytes per pixel in src_pix.
+		void ThresholdRectToPix(Pix* src_pix, int num_channels,
+			const int* thresholds, const int* hi_values,
+			Pix** pix) const;
+
+	protected:
+		/// Clone or other copy of the source Pix.
+		/// The pix will always be PixDestroy()ed on destruction of the class.
+		Pix*                 pix_;
+
+		int                  image_width_;    //< Width of source pix_.
+		int                  image_height_;   //< Height of source pix_.
+		int                  pix_channels_;   //< Number of 8-bit channels in pix_.
+		int                  pix_wpl_;        //< Words per line of pix_.
+		// Limits of image rectangle to be processed.
+		int                  scale_;          //< Scale factor from original image.
+		int                  yres_;           //< y pixels/inch in source image.
+		int                  estimated_res_;  //< Resolution estimate from text size.
+		int                  rect_left_;
+		int                  rect_top_;
+		int                  rect_width_;
+		int                  rect_height_;
+	};
+
+}  // namespace tesseract.
+
+#endif  // TESSERACT_CCMAIN_THRESHOLDER_H__
--- a/hgdriver/3rdparty/hgOCR/include/ccmain/vcsversion.h
+++ b/hgdriver/3rdparty/hgOCR/include/ccmain/vcsversion.h
@ -0,0 +1,2 @@
+#define GIT_REV "3.05.00dev"
+
--- a/hgdriver/3rdparty/hgOCR/include/ccmain/werdit.cpp
+++ b/hgdriver/3rdparty/hgOCR/include/ccmain/werdit.cpp
@ -0,0 +1,59 @@
+/**********************************************************************
+ * File:        werdit.cpp  (Formerly wordit.c)
+ * Description: An iterator for passing over all the words in a document.
+ * Author:		Ray Smith
+ * Created:		Mon Apr 27 08:51:22 BST 1992
+ *
+ * (C) Copyright 1992, Hewlett-Packard Ltd.
+ ** Licensed under the Apache License, Version 2.0 (the "License");
+ ** you may not use this file except in compliance with the License.
+ ** You may obtain a copy of the License at
+ ** http://www.apache.org/licenses/LICENSE-2.0
+ ** Unless required by applicable law or agreed to in writing, software
+ ** distributed under the License is distributed on an "AS IS" BASIS,
+ ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ ** See the License for the specific language governing permissions and
+ ** limitations under the License.
+ *
+ **********************************************************************/
+
+#include "werdit.h"
+
+ /**********************************************************************
+  * make_pseudo_word
+  *
+  * Make all the blobs inside a selection into a single word.
+  * The returned PAGE_RES_IT* it points to the new word. After use, call
+  * it->DeleteCurrentWord() to delete the fake word, and then
+  * delete it to get rid of the iterator itself.
+  **********************************************************************/
+
+PAGE_RES_IT* make_pseudo_word(PAGE_RES* page_res, const TBOX& selection_box) {
+	PAGE_RES_IT pr_it(page_res);
+	C_BLOB_LIST new_blobs;               // list of gathered blobs
+	C_BLOB_IT new_blob_it = &new_blobs;  // iterator
+
+	for (WERD_RES* word_res = pr_it.word(); word_res != NULL;
+		word_res = pr_it.forward()) {
+		WERD* word = word_res->word;
+		if (word->bounding_box().overlap(selection_box)) {
+			C_BLOB_IT blob_it(word->cblob_list());
+			for (blob_it.mark_cycle_pt();
+				!blob_it.cycled_list(); blob_it.forward()) {
+				C_BLOB* blob = blob_it.data();
+				if (blob->bounding_box().overlap(selection_box)) {
+					new_blob_it.add_after_then_move(C_BLOB::deep_copy(blob));
+				}
+			}
+			if (!new_blobs.empty()) {
+				WERD* pseudo_word = new WERD(&new_blobs, 1, NULL);
+				word_res = pr_it.InsertSimpleCloneWord(*word_res, pseudo_word);
+				PAGE_RES_IT* it = new PAGE_RES_IT(page_res);
+				while (it->word() != word_res && it->word() != NULL) it->forward();
+				ASSERT_HOST(it->word() == word_res);
+				return it;
+			}
+		}
+	}
+	return NULL;
+}
--- a/hgdriver/3rdparty/hgOCR/include/ccmain/werdit.h
+++ b/hgdriver/3rdparty/hgOCR/include/ccmain/werdit.h
@ -0,0 +1,27 @@
+/**********************************************************************
+ * File:			wordit.c
+ * Description: An iterator for passing over all the words in a document.
+ * Author:		Ray Smith
+ * Created:		Mon Apr 27 08:51:22 BST 1992
+ *
+ * (C) Copyright 1992, Hewlett-Packard Ltd.
+ ** Licensed under the Apache License, Version 2.0 (the "License");
+ ** you may not use this file except in compliance with the License.
+ ** You may obtain a copy of the License at
+ ** http://www.apache.org/licenses/LICENSE-2.0
+ ** Unless required by applicable law or agreed to in writing, software
+ ** distributed under the License is distributed on an "AS IS" BASIS,
+ ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ ** See the License for the specific language governing permissions and
+ ** limitations under the License.
+ *
+ **********************************************************************/
+
+#ifndef           WERDIT_H
+#define           WERDIT_H
+
+#include          "pageres.h"
+
+PAGE_RES_IT* make_pseudo_word(PAGE_RES* page_res, const TBOX& selection_box);
+
+#endif
--- a/hgdriver/3rdparty/hgOCR/include/ccstruct/blamer.cpp
+++ b/hgdriver/3rdparty/hgOCR/include/ccstruct/blamer.cpp
@ -0,0 +1,603 @@
+///////////////////////////////////////////////////////////////////////
+// File:        blamer.cpp
+// Description: Module allowing precise error causes to be allocated.
+// Author:      Rike Antonova
+// Refactored:  Ray Smith
+// Created:     Mon Feb 04 14:37:01 PST 2013
+//
+// (C) Copyright 2013, Google Inc.
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+// http://www.apache.org/licenses/LICENSE-2.0
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+///////////////////////////////////////////////////////////////////////
+
+#include "blamer.h"
+#include "blobs.h"
+#include "matrix.h"
+#include "normalis.h"
+#include "pageres.h"
+
+// Names for each value of IncorrectResultReason enum. Keep in sync.
+const char kBlameCorrect[] = "corr";
+const char kBlameClassifier[] = "cl";
+const char kBlameChopper[] = "chop";
+const char kBlameClassLMTradeoff[] = "cl/LM";
+const char kBlamePageLayout[] = "pglt";
+const char kBlameSegsearchHeur[] = "ss_heur";
+const char kBlameSegsearchPP[] = "ss_pp";
+const char kBlameClassOldLMTradeoff[] = "cl/old_LM";
+const char kBlameAdaption[] = "adapt";
+const char kBlameNoTruthSplit[] = "no_tr_spl";
+const char kBlameNoTruth[] = "no_tr";
+const char kBlameUnknown[] = "unkn";
+
+const char * const kIncorrectResultReasonNames[] = {
+	kBlameCorrect,
+	kBlameClassifier,
+	kBlameChopper,
+	kBlameClassLMTradeoff,
+	kBlamePageLayout,
+	kBlameSegsearchHeur,
+	kBlameSegsearchPP,
+	kBlameClassOldLMTradeoff,
+	kBlameAdaption,
+	kBlameNoTruthSplit,
+	kBlameNoTruth,
+	kBlameUnknown
+};
+
+const char *BlamerBundle::IncorrectReasonName(IncorrectResultReason irr) {
+	return kIncorrectResultReasonNames[irr];
+}
+
+const char *BlamerBundle::IncorrectReason() const {
+	return kIncorrectResultReasonNames[incorrect_result_reason_];
+}
+
+// Functions to setup the blamer.
+// Whole word string, whole word bounding box.
+void BlamerBundle::SetWordTruth(const UNICHARSET& unicharset,
+	const char* truth_str, const TBOX& word_box) {
+	truth_word_.InsertBox(0, word_box);
+	truth_has_char_boxes_ = false;
+	// Encode the string as UNICHAR_IDs.
+	GenericVector<UNICHAR_ID> encoding;
+	GenericVector<char> lengths;
+	unicharset.encode_string(truth_str, false, &encoding, &lengths, NULL);
+	int total_length = 0;
+	for (int i = 0; i < encoding.size(); total_length += lengths[i++]) {
+		STRING uch(truth_str + total_length);
+		uch.truncate_at(lengths[i] - total_length);
+		UNICHAR_ID id = encoding[i];
+		if (id != INVALID_UNICHAR_ID) uch = unicharset.get_normed_unichar(id);
+		truth_text_.push_back(uch);
+	}
+}
+
+// Single "character" string, "character" bounding box.
+// May be called multiple times to indicate the characters in a word.
+void BlamerBundle::SetSymbolTruth(const UNICHARSET& unicharset,
+	const char* char_str, const TBOX& char_box) {
+	STRING symbol_str(char_str);
+	UNICHAR_ID id = unicharset.unichar_to_id(char_str);
+	if (id != INVALID_UNICHAR_ID) {
+		STRING normed_uch(unicharset.get_normed_unichar(id));
+		if (normed_uch.length() > 0) symbol_str = normed_uch;
+	}
+	int length = truth_word_.length();
+	truth_text_.push_back(symbol_str);
+	truth_word_.InsertBox(length, char_box);
+	if (length == 0)
+		truth_has_char_boxes_ = true;
+	else if (truth_word_.BlobBox(length - 1) == char_box)
+		truth_has_char_boxes_ = false;
+}
+
+// Marks that there is something wrong with the truth text, like it contains
+// reject characters.
+void BlamerBundle::SetRejectedTruth() {
+	incorrect_result_reason_ = IRR_NO_TRUTH;
+	truth_has_char_boxes_ = false;
+}
+
+// Returns true if the provided word_choice is correct.
+bool BlamerBundle::ChoiceIsCorrect(const WERD_CHOICE* word_choice) const {
+	if (word_choice == NULL) return false;
+	const UNICHARSET* uni_set = word_choice->unicharset();
+	STRING normed_choice_str;
+	for (int i = 0; i < word_choice->length(); ++i) {
+		normed_choice_str +=
+			uni_set->get_normed_unichar(word_choice->unichar_id(i));
+	}
+	STRING truth_str = TruthString();
+	return truth_str == normed_choice_str;
+}
+
+void BlamerBundle::FillDebugString(const STRING &msg,
+	const WERD_CHOICE *choice,
+	STRING *debug) {
+	(*debug) += "Truth ";
+	for (int i = 0; i < this->truth_text_.length(); ++i) {
+		(*debug) += this->truth_text_[i];
+	}
+	if (!this->truth_has_char_boxes_) (*debug) += " (no char boxes)";
+	if (choice != NULL) {
+		(*debug) += " Choice ";
+		STRING choice_str;
+		choice->string_and_lengths(&choice_str, NULL);
+		(*debug) += choice_str;
+	}
+	if (msg.length() > 0) {
+		(*debug) += "\n";
+		(*debug) += msg;
+	}
+	(*debug) += "\n";
+}
+
+// Sets up the norm_truth_word from truth_word using the given DENORM.
+void BlamerBundle::SetupNormTruthWord(const DENORM& denorm) {
+	// TODO(rays) Is this the last use of denorm in WERD_RES and can it go?
+	norm_box_tolerance_ = kBlamerBoxTolerance * denorm.x_scale();
+	TPOINT topleft;
+	TPOINT botright;
+	TPOINT norm_topleft;
+	TPOINT norm_botright;
+	for (int b = 0; b < truth_word_.length(); ++b) {
+		const TBOX &box = truth_word_.BlobBox(b);
+		topleft.x = box.left();
+		topleft.y = box.top();
+		botright.x = box.right();
+		botright.y = box.bottom();
+		denorm.NormTransform(NULL, topleft, &norm_topleft);
+		denorm.NormTransform(NULL, botright, &norm_botright);
+		TBOX norm_box(norm_topleft.x, norm_botright.y,
+			norm_botright.x, norm_topleft.y);
+		norm_truth_word_.InsertBox(b, norm_box);
+	}
+}
+
+// Splits *this into two pieces in bundle1 and bundle2 (preallocated, empty
+// bundles) where the right edge/ of the left-hand word is word1_right,
+// and the left edge of the right-hand word is word2_left.
+void BlamerBundle::SplitBundle(int word1_right, int word2_left, bool debug,
+	BlamerBundle* bundle1,
+	BlamerBundle* bundle2) const {
+	STRING debug_str;
+	// Find truth boxes that correspond to the split in the blobs.
+	int b;
+	int begin2_truth_index = -1;
+	if (incorrect_result_reason_ != IRR_NO_TRUTH &&
+		truth_has_char_boxes_) {
+		debug_str = "Looking for truth split at";
+		debug_str.add_str_int(" end1_x ", word1_right);
+		debug_str.add_str_int(" begin2_x ", word2_left);
+		debug_str += "\nnorm_truth_word boxes:\n";
+		if (norm_truth_word_.length() > 1) {
+			norm_truth_word_.BlobBox(0).print_to_str(&debug_str);
+			for (b = 1; b < norm_truth_word_.length(); ++b) {
+				norm_truth_word_.BlobBox(b).print_to_str(&debug_str);
+				if ((abs(word1_right - norm_truth_word_.BlobBox(b - 1).right()) <
+					norm_box_tolerance_) &&
+					(abs(word2_left - norm_truth_word_.BlobBox(b).left()) <
+						norm_box_tolerance_)) {
+					begin2_truth_index = b;
+					debug_str += "Split found";
+					break;
+				}
+			}
+			debug_str += '\n';
+		}
+	}
+	// Populate truth information in word and word2 with the first and second
+	// part of the original truth.
+	if (begin2_truth_index > 0) {
+		bundle1->truth_has_char_boxes_ = true;
+		bundle1->norm_box_tolerance_ = norm_box_tolerance_;
+		bundle2->truth_has_char_boxes_ = true;
+		bundle2->norm_box_tolerance_ = norm_box_tolerance_;
+		BlamerBundle *curr_bb = bundle1;
+		for (b = 0; b < norm_truth_word_.length(); ++b) {
+			if (b == begin2_truth_index) curr_bb = bundle2;
+			curr_bb->norm_truth_word_.InsertBox(b, norm_truth_word_.BlobBox(b));
+			curr_bb->truth_word_.InsertBox(b, truth_word_.BlobBox(b));
+			curr_bb->truth_text_.push_back(truth_text_[b]);
+		}
+	}
+	else if (incorrect_result_reason_ == IRR_NO_TRUTH) {
+		bundle1->incorrect_result_reason_ = IRR_NO_TRUTH;
+		bundle2->incorrect_result_reason_ = IRR_NO_TRUTH;
+	}
+	else {
+		debug_str += "Truth split not found";
+		debug_str += truth_has_char_boxes_ ?
+			"\n" : " (no truth char boxes)\n";
+		bundle1->SetBlame(IRR_NO_TRUTH_SPLIT, debug_str, NULL, debug);
+		bundle2->SetBlame(IRR_NO_TRUTH_SPLIT, debug_str, NULL, debug);
+	}
+}
+
+// "Joins" the blames from bundle1 and bundle2 into *this.
+void BlamerBundle::JoinBlames(const BlamerBundle& bundle1,
+	const BlamerBundle& bundle2, bool debug) {
+	STRING debug_str;
+	IncorrectResultReason irr = incorrect_result_reason_;
+	if (irr != IRR_NO_TRUTH_SPLIT) debug_str = "";
+	if (bundle1.incorrect_result_reason_ != IRR_CORRECT &&
+		bundle1.incorrect_result_reason_ != IRR_NO_TRUTH &&
+		bundle1.incorrect_result_reason_ != IRR_NO_TRUTH_SPLIT) {
+		debug_str += "Blame from part 1: ";
+		debug_str += bundle1.debug_;
+		irr = bundle1.incorrect_result_reason_;
+	}
+	if (bundle2.incorrect_result_reason_ != IRR_CORRECT &&
+		bundle2.incorrect_result_reason_ != IRR_NO_TRUTH &&
+		bundle2.incorrect_result_reason_ != IRR_NO_TRUTH_SPLIT) {
+		debug_str += "Blame from part 2: ";
+		debug_str += bundle2.debug_;
+		if (irr == IRR_CORRECT) {
+			irr = bundle2.incorrect_result_reason_;
+		}
+		else if (irr != bundle2.incorrect_result_reason_) {
+			irr = IRR_UNKNOWN;
+		}
+	}
+	incorrect_result_reason_ = irr;
+	if (irr != IRR_CORRECT && irr != IRR_NO_TRUTH) {
+		SetBlame(irr, debug_str, NULL, debug);
+	}
+}
+
+// If a blob with the same bounding box as one of the truth character
+// bounding boxes is not classified as the corresponding truth character
+// blames character classifier for incorrect answer.
+void BlamerBundle::BlameClassifier(const UNICHARSET& unicharset,
+	const TBOX& blob_box,
+	const BLOB_CHOICE_LIST& choices,
+	bool debug) {
+	if (!truth_has_char_boxes_ ||
+		incorrect_result_reason_ != IRR_CORRECT)
+		return;  // Nothing to do here.
+
+	for (int b = 0; b < norm_truth_word_.length(); ++b) {
+		const TBOX &truth_box = norm_truth_word_.BlobBox(b);
+		// Note that we are more strict on the bounding box boundaries here
+		// than in other places (chopper, segmentation search), since we do
+		// not have the ability to check the previous and next bounding box.
+		if (blob_box.x_almost_equal(truth_box, norm_box_tolerance_ / 2)) {
+			bool found = false;
+			bool incorrect_adapted = false;
+			UNICHAR_ID incorrect_adapted_id = INVALID_UNICHAR_ID;
+			const char *truth_str = truth_text_[b].string();
+			// We promise not to modify the list or its contents, using a
+			// const BLOB_CHOICE* below.
+			BLOB_CHOICE_IT choices_it(const_cast<BLOB_CHOICE_LIST*>(&choices));
+			for (choices_it.mark_cycle_pt(); !choices_it.cycled_list();
+				choices_it.forward()) {
+				const BLOB_CHOICE* choice = choices_it.data();
+				if (strcmp(truth_str, unicharset.get_normed_unichar(
+					choice->unichar_id())) == 0) {
+					found = true;
+					break;
+				}
+				else if (choice->IsAdapted()) {
+					incorrect_adapted = true;
+					incorrect_adapted_id = choice->unichar_id();
+				}
+			}  // end choices_it for loop
+			if (!found) {
+				STRING debug_str = "unichar ";
+				debug_str += truth_str;
+				debug_str += " not found in classification list";
+				SetBlame(IRR_CLASSIFIER, debug_str, NULL, debug);
+			}
+			else if (incorrect_adapted) {
+				STRING debug_str = "better rating for adapted ";
+				debug_str += unicharset.id_to_unichar(incorrect_adapted_id);
+				debug_str += " than for correct ";
+				debug_str += truth_str;
+				SetBlame(IRR_ADAPTION, debug_str, NULL, debug);
+			}
+			break;
+		}
+	}  // end iterating over blamer_bundle->norm_truth_word
+}
+
+// Checks whether chops were made at all the character bounding box
+// boundaries in word->truth_word. If not - blames the chopper for an
+// incorrect answer.
+void BlamerBundle::SetChopperBlame(const WERD_RES* word, bool debug) {
+	if (NoTruth() || !truth_has_char_boxes_ ||
+		word->chopped_word->blobs.empty()) {
+		return;
+	}
+	STRING debug_str;
+	bool missing_chop = false;
+	int num_blobs = word->chopped_word->blobs.size();
+	int box_index = 0;
+	int blob_index = 0;
+	inT16 truth_x = -1;
+	while (box_index < truth_word_.length() && blob_index < num_blobs) {
+		truth_x = norm_truth_word_.BlobBox(box_index).right();
+		TBLOB * curr_blob = word->chopped_word->blobs[blob_index];
+		if (curr_blob->bounding_box().right() < truth_x - norm_box_tolerance_) {
+			++blob_index;
+			continue;  // encountered an extra chop, keep looking
+		}
+		else if (curr_blob->bounding_box().right() >
+			truth_x + norm_box_tolerance_) {
+			missing_chop = true;
+			break;
+		}
+		else {
+			++blob_index;
+		}
+	}
+	if (missing_chop || box_index < norm_truth_word_.length()) {
+		STRING debug_str;
+		if (missing_chop) {
+			debug_str.add_str_int("Detected missing chop (tolerance=",
+				norm_box_tolerance_);
+			debug_str += ") at Bounding Box=";
+			TBLOB * curr_blob = word->chopped_word->blobs[blob_index];
+			curr_blob->bounding_box().print_to_str(&debug_str);
+			debug_str.add_str_int("\nNo chop for truth at x=", truth_x);
+		}
+		else {
+			debug_str.add_str_int("Missing chops for last ",
+				norm_truth_word_.length() - box_index);
+			debug_str += " truth box(es)";
+		}
+		debug_str += "\nMaximally chopped word boxes:\n";
+		for (blob_index = 0; blob_index < num_blobs; ++blob_index) {
+			TBLOB * curr_blob = word->chopped_word->blobs[blob_index];
+			curr_blob->bounding_box().print_to_str(&debug_str);
+			debug_str += '\n';
+		}
+		debug_str += "Truth  bounding  boxes:\n";
+		for (box_index = 0; box_index < norm_truth_word_.length(); ++box_index) {
+			norm_truth_word_.BlobBox(box_index).print_to_str(&debug_str);
+			debug_str += '\n';
+		}
+		SetBlame(IRR_CHOPPER, debug_str, word->best_choice, debug);
+	}
+}
+
+// Blames the classifier or the language model if, after running only the
+// chopper, best_choice is incorrect and no blame has been yet set.
+// Blames the classifier if best_choice is classifier's top choice and is a
+// dictionary word (i.e. language model could not have helped).
+// Otherwise, blames the language model (formerly permuter word adjustment).
+void BlamerBundle::BlameClassifierOrLangModel(
+	const WERD_RES* word,
+	const UNICHARSET& unicharset, bool valid_permuter, bool debug) {
+	if (valid_permuter) {
+		// Find out whether best choice is a top choice.
+		best_choice_is_dict_and_top_choice_ = true;
+		for (int i = 0; i < word->best_choice->length(); ++i) {
+			BLOB_CHOICE_IT blob_choice_it(word->GetBlobChoices(i));
+			ASSERT_HOST(!blob_choice_it.empty());
+			BLOB_CHOICE *first_choice = NULL;
+			for (blob_choice_it.mark_cycle_pt(); !blob_choice_it.cycled_list();
+				blob_choice_it.forward()) {  // find first non-fragment choice
+				if (!(unicharset.get_fragment(blob_choice_it.data()->unichar_id()))) {
+					first_choice = blob_choice_it.data();
+					break;
+				}
+			}
+			ASSERT_HOST(first_choice != NULL);
+			if (first_choice->unichar_id() != word->best_choice->unichar_id(i)) {
+				best_choice_is_dict_and_top_choice_ = false;
+				break;
+			}
+		}
+	}
+	STRING debug_str;
+	if (best_choice_is_dict_and_top_choice_) {
+		debug_str = "Best choice is: incorrect, top choice, dictionary word";
+		debug_str += " with permuter ";
+		debug_str += word->best_choice->permuter_name();
+	}
+	else {
+		debug_str = "Classifier/Old LM tradeoff is to blame";
+	}
+	SetBlame(best_choice_is_dict_and_top_choice_ ? IRR_CLASSIFIER
+		: IRR_CLASS_OLD_LM_TRADEOFF,
+		debug_str, word->best_choice, debug);
+}
+
+// Sets up the correct_segmentation_* to mark the correct bounding boxes.
+void BlamerBundle::SetupCorrectSegmentation(const TWERD* word, bool debug) {
+	params_training_bundle_.StartHypothesisList();
+	if (incorrect_result_reason_ != IRR_CORRECT || !truth_has_char_boxes_)
+		return;  // Nothing to do here.
+
+	STRING debug_str;
+	debug_str += "Blamer computing correct_segmentation_cols\n";
+	int curr_box_col = 0;
+	int next_box_col = 0;
+	int num_blobs = word->NumBlobs();
+	if (num_blobs == 0) return;  // No blobs to play with.
+	int blob_index = 0;
+	inT16 next_box_x = word->blobs[blob_index]->bounding_box().right();
+	for (int truth_idx = 0; blob_index < num_blobs &&
+		truth_idx < norm_truth_word_.length();
+		++blob_index) {
+		++next_box_col;
+		inT16 curr_box_x = next_box_x;
+		if (blob_index + 1 < num_blobs)
+			next_box_x = word->blobs[blob_index + 1]->bounding_box().right();
+		inT16 truth_x = norm_truth_word_.BlobBox(truth_idx).right();
+		debug_str.add_str_int("Box x coord vs. truth: ", curr_box_x);
+		debug_str.add_str_int(" ", truth_x);
+		debug_str += "\n";
+		if (curr_box_x > (truth_x + norm_box_tolerance_)) {
+			break;  // failed to find a matching box
+		}
+		else if (curr_box_x >= truth_x - norm_box_tolerance_ &&  // matched
+			(blob_index + 1 >= num_blobs ||  // next box can't be included
+				next_box_x > truth_x + norm_box_tolerance_)) {
+			correct_segmentation_cols_.push_back(curr_box_col);
+			correct_segmentation_rows_.push_back(next_box_col - 1);
+			++truth_idx;
+			debug_str.add_str_int("col=", curr_box_col);
+			debug_str.add_str_int(" row=", next_box_col - 1);
+			debug_str += "\n";
+			curr_box_col = next_box_col;
+		}
+	}
+	if (blob_index < num_blobs ||  // trailing blobs
+		correct_segmentation_cols_.length() != norm_truth_word_.length()) {
+		debug_str.add_str_int("Blamer failed to find correct segmentation"
+			" (tolerance=", norm_box_tolerance_);
+		if (blob_index >= num_blobs) debug_str += " blob == NULL";
+		debug_str += ")\n";
+		debug_str.add_str_int(" path length ", correct_segmentation_cols_.length());
+		debug_str.add_str_int(" vs. truth ", norm_truth_word_.length());
+		debug_str += "\n";
+		SetBlame(IRR_UNKNOWN, debug_str, NULL, debug);
+		correct_segmentation_cols_.clear();
+		correct_segmentation_rows_.clear();
+	}
+}
+
+// Returns true if a guided segmentation search is needed.
+bool BlamerBundle::GuidedSegsearchNeeded(const WERD_CHOICE *best_choice) const {
+	return incorrect_result_reason_ == IRR_CORRECT &&
+		!segsearch_is_looking_for_blame_ &&
+		truth_has_char_boxes_ &&
+		!ChoiceIsCorrect(best_choice);
+}
+
+// Setup ready to guide the segmentation search to the correct segmentation.
+// The callback pp_cb is used to avoid a cyclic dependency.
+// It calls into LMPainPoints::GenerateForBlamer by pre-binding the
+// WERD_RES, and the LMPainPoints itself.
+// pp_cb must be a permanent callback, and should be deleted by the caller.
+void BlamerBundle::InitForSegSearch(const WERD_CHOICE *best_choice,
+	MATRIX* ratings, UNICHAR_ID wildcard_id,
+	bool debug, STRING *debug_str,
+	TessResultCallback2<bool, int, int>* cb) {
+	segsearch_is_looking_for_blame_ = true;
+	if (debug) {
+		tprintf("segsearch starting to look for blame\n");
+	}
+	// Fill pain points for any unclassifed blob corresponding to the
+	// correct segmentation state.
+	*debug_str += "Correct segmentation:\n";
+	for (int idx = 0; idx < correct_segmentation_cols_.length(); ++idx) {
+		debug_str->add_str_int("col=", correct_segmentation_cols_[idx]);
+		debug_str->add_str_int(" row=", correct_segmentation_rows_[idx]);
+		*debug_str += "\n";
+		if (!ratings->Classified(correct_segmentation_cols_[idx],
+			correct_segmentation_rows_[idx],
+			wildcard_id) &&
+			!cb->Run(correct_segmentation_cols_[idx],
+				correct_segmentation_rows_[idx])) {
+			segsearch_is_looking_for_blame_ = false;
+			*debug_str += "\nFailed to insert pain point\n";
+			SetBlame(IRR_SEGSEARCH_HEUR, *debug_str, best_choice, debug);
+			break;
+		}
+	}  // end for blamer_bundle->correct_segmentation_cols/rows
+}
+// Returns true if the guided segsearch is in progress.
+bool BlamerBundle::GuidedSegsearchStillGoing() const {
+	return segsearch_is_looking_for_blame_;
+}
+
+// The segmentation search has ended. Sets the blame appropriately.
+void BlamerBundle::FinishSegSearch(const WERD_CHOICE *best_choice,
+	bool debug, STRING *debug_str) {
+	// If we are still looking for blame (i.e. best_choice is incorrect, but a
+	// path representing the correct segmentation could be constructed), we can
+	// blame segmentation search pain point prioritization if the rating of the
+	// path corresponding to the correct segmentation is better than that of
+	// best_choice (i.e. language model would have done the correct thing, but
+	// because of poor pain point prioritization the correct segmentation was
+	// never explored). Otherwise we blame the tradeoff between the language model
+	// and the classifier, since even after exploring the path corresponding to
+	// the correct segmentation incorrect best_choice would have been chosen.
+	// One special case when we blame the classifier instead is when best choice
+	// is incorrect, but it is a dictionary word and it classifier's top choice.
+	if (segsearch_is_looking_for_blame_) {
+		segsearch_is_looking_for_blame_ = false;
+		if (best_choice_is_dict_and_top_choice_) {
+			*debug_str = "Best choice is: incorrect, top choice, dictionary word";
+			*debug_str += " with permuter ";
+			*debug_str += best_choice->permuter_name();
+			SetBlame(IRR_CLASSIFIER, *debug_str, best_choice, debug);
+		}
+		else if (best_correctly_segmented_rating_ <
+			best_choice->rating()) {
+			*debug_str += "Correct segmentation state was not explored";
+			SetBlame(IRR_SEGSEARCH_PP, *debug_str, best_choice, debug);
+		}
+		else {
+			if (best_correctly_segmented_rating_ >=
+				WERD_CHOICE::kBadRating) {
+				*debug_str += "Correct segmentation paths were pruned by LM\n";
+			}
+			else {
+				debug_str->add_str_double("Best correct segmentation rating ",
+					best_correctly_segmented_rating_);
+				debug_str->add_str_double(" vs. best choice rating ",
+					best_choice->rating());
+			}
+			SetBlame(IRR_CLASS_LM_TRADEOFF, *debug_str, best_choice, debug);
+		}
+	}
+}
+
+// If the bundle is null or still does not indicate the correct result,
+// fix it and use some backup reason for the blame.
+void BlamerBundle::LastChanceBlame(bool debug, WERD_RES* word) {
+	if (word->blamer_bundle == NULL) {
+		word->blamer_bundle = new BlamerBundle();
+		word->blamer_bundle->SetBlame(IRR_PAGE_LAYOUT, "LastChanceBlame",
+			word->best_choice, debug);
+	}
+	else if (word->blamer_bundle->incorrect_result_reason_ == IRR_NO_TRUTH) {
+		word->blamer_bundle->SetBlame(IRR_NO_TRUTH, "Rejected truth",
+			word->best_choice, debug);
+	}
+	else {
+		bool correct = word->blamer_bundle->ChoiceIsCorrect(word->best_choice);
+		IncorrectResultReason irr = word->blamer_bundle->incorrect_result_reason_;
+		if (irr == IRR_CORRECT && !correct) {
+			STRING debug_str = "Choice is incorrect after recognition";
+			word->blamer_bundle->SetBlame(IRR_UNKNOWN, debug_str, word->best_choice,
+				debug);
+		}
+		else if (irr != IRR_CORRECT && correct) {
+			if (debug) {
+				tprintf("Corrected %s\n", word->blamer_bundle->debug_.string());
+			}
+			word->blamer_bundle->incorrect_result_reason_ = IRR_CORRECT;
+			word->blamer_bundle->debug_ = "";
+		}
+	}
+}
+
+// Sets the misadaption debug if this word is incorrect, as this word is
+// being adapted to.
+void BlamerBundle::SetMisAdaptionDebug(const WERD_CHOICE *best_choice,
+	bool debug) {
+	if (incorrect_result_reason_ != IRR_NO_TRUTH &&
+		!ChoiceIsCorrect(best_choice)) {
+		misadaption_debug_ = "misadapt to word (";
+		misadaption_debug_ += best_choice->permuter_name();
+		misadaption_debug_ += "): ";
+		FillDebugString("", best_choice, &misadaption_debug_);
+		if (debug) {
+			tprintf("%s\n", misadaption_debug_.string());
+		}
+	}
+}
+
--- a/hgdriver/3rdparty/hgOCR/include/ccstruct/blamer.h
+++ b/hgdriver/3rdparty/hgOCR/include/ccstruct/blamer.h
@ -0,0 +1,333 @@
+///////////////////////////////////////////////////////////////////////
+// File:        blamer.h
+// Description: Module allowing precise error causes to be allocated.
+// Author:      Rike Antonova
+// Refactored:  Ray Smith
+// Created:     Mon Feb 04 14:37:01 PST 2013
+//
+// (C) Copyright 2013, Google Inc.
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+// http://www.apache.org/licenses/LICENSE-2.0
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+///////////////////////////////////////////////////////////////////////
+
+#ifndef TESSERACT_CCSTRUCT_BLAMER_H_
+#define TESSERACT_CCSTRUCT_BLAMER_H_
+
+#include <stdio.h>
+#include "boxword.h"
+#include "genericvector.h"
+#include "matrix.h"
+#include "params_training_featdef.h"
+#include "ratngs.h"
+#include "strngs.h"
+#include "tesscallback.h"
+
+static const inT16 kBlamerBoxTolerance = 5;
+
+// Enum for expressing the source of error.
+// Note: Please update kIncorrectResultReasonNames when modifying this enum.
+enum IncorrectResultReason {
+	// The text recorded in best choice == truth text
+	IRR_CORRECT,
+	// Either: Top choice is incorrect and is a dictionary word (language model
+	// is unlikely to help correct such errors, so blame the classifier).
+	// Or: the correct unichar was not included in shortlist produced by the
+	// classifier at all.
+	IRR_CLASSIFIER,
+	// Chopper have not found one or more splits that correspond to the correct
+	// character bounding boxes recorded in BlamerBundle::truth_word.
+	IRR_CHOPPER,
+	// Classifier did include correct unichars for each blob in the correct
+	// segmentation, however its rating could have been too bad to allow the
+	// language model to pull out the correct choice. On the other hand the
+	// strength of the language model might have been too weak to favor the
+	// correct answer, this we call this case a classifier-language model
+	// tradeoff error.
+	IRR_CLASS_LM_TRADEOFF,
+	// Page layout failed to produce the correct bounding box. Blame page layout
+	// if the truth was not found for the word, which implies that the bounding
+	// box of the word was incorrect (no truth word had a similar bounding box).
+	IRR_PAGE_LAYOUT,
+	// SegSearch heuristic prevented one or more blobs from the correct
+	// segmentation state to be classified (e.g. the blob was too wide).
+	IRR_SEGSEARCH_HEUR,
+	// The correct segmentaiton state was not explored because of poor SegSearch
+	// pain point prioritization. We blame SegSearch pain point prioritization
+	// if the best rating of a choice constructed from correct segmentation is
+	// better than that of the best choice (i.e. if we got to explore the correct
+	// segmentation state, language model would have picked the correct choice).
+	IRR_SEGSEARCH_PP,
+	// Same as IRR_CLASS_LM_TRADEOFF, but used when we only run chopper on a word,
+	// and thus use the old language model (permuters).
+	// TODO(antonova): integrate the new language mode with chopper
+	IRR_CLASS_OLD_LM_TRADEOFF,
+	// If there is an incorrect adaptive template match with a better score than
+	// a correct one (either pre-trained or adapted), mark this as adaption error.
+	IRR_ADAPTION,
+	// split_and_recog_word() failed to find a suitable split in truth.
+	IRR_NO_TRUTH_SPLIT,
+	// Truth is not available for this word (e.g. when words in corrected content
+	// file are turned into ~~~~ because an appropriate alignment was not found.
+	IRR_NO_TRUTH,
+	// The text recorded in best choice != truth text, but none of the above
+	// reasons are set.
+	IRR_UNKNOWN,
+
+	IRR_NUM_REASONS
+};
+
+// Blamer-related information to determine the source of errors.
+struct BlamerBundle {
+	static const char *IncorrectReasonName(IncorrectResultReason irr);
+	BlamerBundle() : truth_has_char_boxes_(false),
+		incorrect_result_reason_(IRR_CORRECT),
+		lattice_data_(NULL) {
+		ClearResults();
+	}
+	BlamerBundle(const BlamerBundle &other) {
+		this->CopyTruth(other);
+		this->CopyResults(other);
+	}
+	~BlamerBundle() { delete[] lattice_data_; }
+
+	// Accessors.
+	STRING TruthString() const {
+		STRING truth_str;
+		for (int i = 0; i < truth_text_.length(); ++i)
+			truth_str += truth_text_[i];
+		return truth_str;
+	}
+	IncorrectResultReason incorrect_result_reason() const {
+		return incorrect_result_reason_;
+	}
+	bool NoTruth() const {
+		return incorrect_result_reason_ == IRR_NO_TRUTH ||
+			incorrect_result_reason_ == IRR_PAGE_LAYOUT;
+	}
+	bool HasDebugInfo() const {
+		return debug_.length() > 0 || misadaption_debug_.length() > 0;
+	}
+	const STRING& debug() const {
+		return debug_;
+	}
+	const STRING& misadaption_debug() const {
+		return misadaption_debug_;
+	}
+	void UpdateBestRating(float rating) {
+		if (rating < best_correctly_segmented_rating_)
+			best_correctly_segmented_rating_ = rating;
+	}
+	int correct_segmentation_length() const {
+		return correct_segmentation_cols_.length();
+	}
+	// Returns true if the given ratings matrix col,row position is included
+	// in the correct segmentation path at the given index.
+	bool MatrixPositionCorrect(int index, const MATRIX_COORD& coord) {
+		return correct_segmentation_cols_[index] == coord.col &&
+			correct_segmentation_rows_[index] == coord.row;
+	}
+	void set_best_choice_is_dict_and_top_choice(bool value) {
+		best_choice_is_dict_and_top_choice_ = value;
+	}
+	const char* lattice_data() const {
+		return lattice_data_;
+	}
+	int lattice_size() const {
+		return lattice_size_;  // size of lattice_data in bytes
+	}
+	void set_lattice_data(const char* data, int size) {
+		lattice_size_ = size;
+		delete[] lattice_data_;
+		lattice_data_ = new char[lattice_size_];
+		memcpy(lattice_data_, data, lattice_size_);
+	}
+	const tesseract::ParamsTrainingBundle& params_training_bundle() const {
+		return params_training_bundle_;
+	}
+	// Adds a new ParamsTrainingHypothesis to the current hypothesis list.
+	void AddHypothesis(const tesseract::ParamsTrainingHypothesis& hypo) {
+		params_training_bundle_.AddHypothesis(hypo);
+	}
+
+	// Functions to setup the blamer.
+	// Whole word string, whole word bounding box.
+	void SetWordTruth(const UNICHARSET& unicharset,
+		const char* truth_str, const TBOX& word_box);
+	// Single "character" string, "character" bounding box.
+	// May be called multiple times to indicate the characters in a word.
+	void SetSymbolTruth(const UNICHARSET& unicharset,
+		const char* char_str, const TBOX& char_box);
+	// Marks that there is something wrong with the truth text, like it contains
+	// reject characters.
+	void SetRejectedTruth();
+
+	// Returns true if the provided word_choice is correct.
+	bool ChoiceIsCorrect(const WERD_CHOICE* word_choice) const;
+
+	void ClearResults() {
+		norm_truth_word_.DeleteAllBoxes();
+		norm_box_tolerance_ = 0;
+		if (!NoTruth()) incorrect_result_reason_ = IRR_CORRECT;
+		debug_ = "";
+		segsearch_is_looking_for_blame_ = false;
+		best_correctly_segmented_rating_ = WERD_CHOICE::kBadRating;
+		correct_segmentation_cols_.clear();
+		correct_segmentation_rows_.clear();
+		best_choice_is_dict_and_top_choice_ = false;
+		delete[] lattice_data_;
+		lattice_data_ = NULL;
+		lattice_size_ = 0;
+	}
+	void CopyTruth(const BlamerBundle &other) {
+		truth_has_char_boxes_ = other.truth_has_char_boxes_;
+		truth_word_ = other.truth_word_;
+		truth_text_ = other.truth_text_;
+		incorrect_result_reason_ =
+			(other.NoTruth() ? other.incorrect_result_reason_ : IRR_CORRECT);
+	}
+	void CopyResults(const BlamerBundle &other) {
+		norm_truth_word_ = other.norm_truth_word_;
+		norm_box_tolerance_ = other.norm_box_tolerance_;
+		incorrect_result_reason_ = other.incorrect_result_reason_;
+		segsearch_is_looking_for_blame_ = other.segsearch_is_looking_for_blame_;
+		best_correctly_segmented_rating_ = other.best_correctly_segmented_rating_;
+		correct_segmentation_cols_ = other.correct_segmentation_cols_;
+		correct_segmentation_rows_ = other.correct_segmentation_rows_;
+		best_choice_is_dict_and_top_choice_ =
+			other.best_choice_is_dict_and_top_choice_;
+		if (other.lattice_data_ != NULL) {
+			lattice_data_ = new char[other.lattice_size_];
+			memcpy(lattice_data_, other.lattice_data_, other.lattice_size_);
+			lattice_size_ = other.lattice_size_;
+		}
+		else {
+			lattice_data_ = NULL;
+		}
+	}
+	const char *IncorrectReason() const;
+
+	// Appends choice and truth details to the given debug string.
+	void FillDebugString(const STRING &msg, const WERD_CHOICE *choice,
+		STRING *debug);
+
+	// Sets up the norm_truth_word from truth_word using the given DENORM.
+	void SetupNormTruthWord(const DENORM& denorm);
+
+	// Splits *this into two pieces in bundle1 and bundle2 (preallocated, empty
+	// bundles) where the right edge/ of the left-hand word is word1_right,
+	// and the left edge of the right-hand word is word2_left.
+	void SplitBundle(int word1_right, int word2_left, bool debug,
+		BlamerBundle* bundle1, BlamerBundle* bundle2) const;
+	// "Joins" the blames from bundle1 and bundle2 into *this.
+	void JoinBlames(const BlamerBundle& bundle1, const BlamerBundle& bundle2,
+		bool debug);
+
+	// If a blob with the same bounding box as one of the truth character
+	// bounding boxes is not classified as the corresponding truth character
+	// blames character classifier for incorrect answer.
+	void BlameClassifier(const UNICHARSET& unicharset,
+		const TBOX& blob_box,
+		const BLOB_CHOICE_LIST& choices,
+		bool debug);
+
+
+	// Checks whether chops were made at all the character bounding box
+	// boundaries in word->truth_word. If not - blames the chopper for an
+	// incorrect answer.
+	void SetChopperBlame(const WERD_RES* word, bool debug);
+	// Blames the classifier or the language model if, after running only the
+	// chopper, best_choice is incorrect and no blame has been yet set.
+	// Blames the classifier if best_choice is classifier's top choice and is a
+	// dictionary word (i.e. language model could not have helped).
+	// Otherwise, blames the language model (formerly permuter word adjustment).
+	void BlameClassifierOrLangModel(
+		const WERD_RES* word,
+		const UNICHARSET& unicharset, bool valid_permuter, bool debug);
+	// Sets up the correct_segmentation_* to mark the correct bounding boxes.
+	void SetupCorrectSegmentation(const TWERD* word, bool debug);
+
+	// Returns true if a guided segmentation search is needed.
+	bool GuidedSegsearchNeeded(const WERD_CHOICE *best_choice) const;
+	// Setup ready to guide the segmentation search to the correct segmentation.
+	// The callback pp_cb is used to avoid a cyclic dependency.
+	// It calls into LMPainPoints::GenerateForBlamer by pre-binding the
+	// WERD_RES, and the LMPainPoints itself.
+	// pp_cb must be a permanent callback, and should be deleted by the caller.
+	void InitForSegSearch(const WERD_CHOICE *best_choice,
+		MATRIX* ratings, UNICHAR_ID wildcard_id,
+		bool debug, STRING *debug_str,
+		TessResultCallback2<bool, int, int>* pp_cb);
+	// Returns true if the guided segsearch is in progress.
+	bool GuidedSegsearchStillGoing() const;
+	// The segmentation search has ended. Sets the blame appropriately.
+	void FinishSegSearch(const WERD_CHOICE *best_choice,
+		bool debug, STRING *debug_str);
+
+	// If the bundle is null or still does not indicate the correct result,
+	// fix it and use some backup reason for the blame.
+	static void LastChanceBlame(bool debug, WERD_RES* word);
+
+	// Sets the misadaption debug if this word is incorrect, as this word is
+	// being adapted to.
+	void SetMisAdaptionDebug(const WERD_CHOICE *best_choice, bool debug);
+
+private:
+	void SetBlame(IncorrectResultReason irr, const STRING &msg,
+		const WERD_CHOICE *choice, bool debug) {
+		incorrect_result_reason_ = irr;
+		debug_ = IncorrectReason();
+		debug_ += " to blame: ";
+		FillDebugString(msg, choice, &debug_);
+		if (debug) tprintf("SetBlame(): %s", debug_.string());
+	}
+
+private:
+	// Set to true when bounding boxes for individual unichars are recorded.
+	bool truth_has_char_boxes_;
+	// The true_word (in the original image coordinate space) contains ground
+	// truth bounding boxes for this WERD_RES.
+	tesseract::BoxWord truth_word_;
+	// Same as above, but in normalized coordinates
+	// (filled in by WERD_RES::SetupForRecognition()).
+	tesseract::BoxWord norm_truth_word_;
+	// Tolerance for bounding box comparisons in normalized space.
+	int norm_box_tolerance_;
+	// Contains ground truth unichar for each of the bounding boxes in truth_word.
+	GenericVector<STRING> truth_text_;
+	// The reason for incorrect OCR result.
+	IncorrectResultReason incorrect_result_reason_;
+	// Debug text associated with the blame.
+	STRING debug_;
+	// Misadaption debug information (filled in if this word was misadapted to).
+	STRING misadaption_debug_;
+	// Variables used by the segmentation search when looking for the blame.
+	// Set to true while segmentation search is continued after the usual
+	// termination condition in order to look for the blame.
+	bool segsearch_is_looking_for_blame_;
+	// Best rating for correctly segmented path
+	// (set and used by SegSearch when looking for blame).
+	float best_correctly_segmented_rating_;
+	// Vectors populated by SegSearch to indicate column and row indices that
+	// correspond to blobs with correct bounding boxes.
+	GenericVector<int> correct_segmentation_cols_;
+	GenericVector<int> correct_segmentation_rows_;
+	// Set to true if best choice is a dictionary word and
+	// classifier's top choice.
+	bool best_choice_is_dict_and_top_choice_;
+	// Serialized segmentation search lattice.
+	char *lattice_data_;
+	int lattice_size_;  // size of lattice_data in bytes
+	// Information about hypotheses (paths) explored by the segmentation search.
+	tesseract::ParamsTrainingBundle params_training_bundle_;
+};
+
+
+#endif  // TESSERACT_CCSTRUCT_BLAMER_H_
--- a/hgdriver/3rdparty/hgOCR/include/ccstruct/blckerr.h
+++ b/hgdriver/3rdparty/hgOCR/include/ccstruct/blckerr.h
@ -0,0 +1,29 @@
+/**********************************************************************
+ * File:        blckerr.h  (Formerly blockerr.h)
+ * Description: Error codes for the page block classes.
+ * Author:					Ray Smith
+ * Created:					Tue Mar 19 17:43:30 GMT 1991
+ *
+ * (C) Copyright 1991, Hewlett-Packard Ltd.
+ ** Licensed under the Apache License, Version 2.0 (the "License");
+ ** you may not use this file except in compliance with the License.
+ ** You may obtain a copy of the License at
+ ** http://www.apache.org/licenses/LICENSE-2.0
+ ** Unless required by applicable law or agreed to in writing, software
+ ** distributed under the License is distributed on an "AS IS" BASIS,
+ ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ ** See the License for the specific language governing permissions and
+ ** limitations under the License.
+ *
+ **********************************************************************/
+
+#ifndef           BLCKERR_H
+#define           BLCKERR_H
+
+#include          "errcode.h"
+
+const ERRCODE BADBLOCKLINE = "Y coordinate in block out of bounds";
+const ERRCODE LOSTBLOCKLINE = "Can't find rectangle for line";
+const ERRCODE ILLEGAL_GRADIENT = "Gradient wrong side of edge step!";
+const ERRCODE WRONG_WORD = "Word doesn't have blobs of that type";
+#endif
--- a/hgdriver/3rdparty/hgOCR/include/ccstruct/blobbox.cpp
+++ b/hgdriver/3rdparty/hgOCR/include/ccstruct/blobbox.cpp
--- a/hgdriver/3rdparty/hgOCR/include/ccstruct/blobbox.h
+++ b/hgdriver/3rdparty/hgOCR/include/ccstruct/blobbox.h
@ -0,0 +1,847 @@
+/**********************************************************************
+ * File:        blobbox.h  (Formerly blobnbox.h)
+ * Description: Code for the textord blob class.
+ * Author:					Ray Smith
+ * Created:					Thu Jul 30 09:08:51 BST 1992
+ *
+ * (C) Copyright 1992, Hewlett-Packard Ltd.
+ ** Licensed under the Apache License, Version 2.0 (the "License");
+ ** you may not use this file except in compliance with the License.
+ ** You may obtain a copy of the License at
+ ** http://www.apache.org/licenses/LICENSE-2.0
+ ** Unless required by applicable law or agreed to in writing, software
+ ** distributed under the License is distributed on an "AS IS" BASIS,
+ ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ ** See the License for the specific language governing permissions and
+ ** limitations under the License.
+ *
+ **********************************************************************/
+
+#ifndef           BLOBBOX_H
+#define           BLOBBOX_H
+
+#include          "clst.h"
+#include          "elst2.h"
+#include          "werd.h"
+#include          "ocrblock.h"
+#include          "statistc.h"
+
+enum PITCH_TYPE
+{
+	PITCH_DUNNO,                   // insufficient data
+	PITCH_DEF_FIXED,               // definitely fixed
+	PITCH_MAYBE_FIXED,             // could be
+	PITCH_DEF_PROP,
+	PITCH_MAYBE_PROP,
+	PITCH_CORR_FIXED,
+	PITCH_CORR_PROP
+};
+
+// The possible tab-stop types of each side of a BLOBNBOX.
+// The ordering is important, as it is used for deleting dead-ends in the
+// search. ALIGNED, CONFIRMED and VLINE should remain greater than the
+// non-aligned, unset, or deleted members.
+enum TabType {
+	TT_NONE,           // Not a tab.
+	TT_DELETED,        // Not a tab after detailed analysis.
+	TT_MAYBE_RAGGED,   // Initial designation of a tab-stop candidate.
+	TT_MAYBE_ALIGNED,  // Initial designation of a tab-stop candidate.
+	TT_CONFIRMED,      // Aligned with neighbours.
+	TT_VLINE           // Detected as a vertical line.
+};
+
+// The possible region types of a BLOBNBOX.
+// Note: keep all the text types > BRT_UNKNOWN and all the image types less.
+// Keep in sync with kBlobTypes in colpartition.cpp and BoxColor, and the
+// *Type static functions below.
+enum BlobRegionType {
+	BRT_NOISE,      // Neither text nor image.
+	BRT_HLINE,      // Horizontal separator line.
+	BRT_VLINE,      // Vertical separator line.
+	BRT_RECTIMAGE,  // Rectangular image.
+	BRT_POLYIMAGE,  // Non-rectangular image.
+	BRT_UNKNOWN,    // Not determined yet.
+	BRT_VERT_TEXT,  // Vertical alignment, not necessarily vertically oriented.
+	BRT_TEXT,       // Convincing text.
+
+	BRT_COUNT       // Number of possibilities.
+};
+
+// enum for elements of arrays that refer to neighbours.
+// NOTE: keep in this order, so ^2 can be used to flip direction.
+enum BlobNeighbourDir {
+	BND_LEFT,
+	BND_BELOW,
+	BND_RIGHT,
+	BND_ABOVE,
+	BND_COUNT
+};
+
+// enum for special type of text characters, such as math symbol or italic.
+enum BlobSpecialTextType {
+	BSTT_NONE,  // No special.
+	BSTT_ITALIC,  // Italic style.
+	BSTT_DIGIT,  // Digit symbols.
+	BSTT_MATH,  // Mathmatical symobls (not including digit).
+	BSTT_UNCLEAR,  // Characters with low recognition rate.
+	BSTT_SKIP,  // Characters that we skip labeling (usually too small).
+	BSTT_COUNT
+};
+
+inline BlobNeighbourDir DirOtherWay(BlobNeighbourDir dir) {
+	return static_cast<BlobNeighbourDir>(dir ^ 2);
+}
+
+// BlobTextFlowType indicates the quality of neighbouring information
+// related to a chain of connected components, either horizontally or
+// vertically. Also used by ColPartition for the collection of blobs
+// within, which should all have the same value in most cases.
+enum BlobTextFlowType {
+	BTFT_NONE,           // No text flow set yet.
+	BTFT_NONTEXT,        // Flow too poor to be likely text.
+	BTFT_NEIGHBOURS,     // Neighbours support flow in this direction.
+	BTFT_CHAIN,          // There is a weak chain of text in this direction.
+	BTFT_STRONG_CHAIN,   // There is a strong chain of text in this direction.
+	BTFT_TEXT_ON_IMAGE,  // There is a strong chain of text on an image.
+	BTFT_LEADER,         // Leader dots/dashes etc.
+	BTFT_COUNT
+};
+
+// Returns true if type1 dominates type2 in a merge. Mostly determined by the
+// ordering of the enum, LEADER is weak and dominates nothing.
+// The function is anti-symmetric (t1 > t2) === !(t2 > t1), except that
+// this cannot be true if t1 == t2, so the result is undefined.
+inline bool DominatesInMerge(BlobTextFlowType type1, BlobTextFlowType type2) {
+	// LEADER always loses.
+	if (type1 == BTFT_LEADER) return false;
+	if (type2 == BTFT_LEADER) return true;
+	// With those out of the way, the ordering of the enum determines the result.
+	return type1 >= type2;
+}
+
+namespace tesseract {
+	class ColPartition;
+}
+
+class BLOBNBOX;
+ELISTIZEH(BLOBNBOX)
+class BLOBNBOX :public ELIST_LINK
+{
+public:
+	BLOBNBOX() {
+		ConstructionInit();
+	}
+	explicit BLOBNBOX(C_BLOB *srcblob) {
+		box = srcblob->bounding_box();
+		ConstructionInit();
+		cblob_ptr = srcblob;
+		area = static_cast<int>(srcblob->area());
+	}
+	~BLOBNBOX() {
+		if (owns_cblob_) delete cblob_ptr;
+	}
+	static BLOBNBOX* RealBlob(C_OUTLINE* outline) {
+		C_BLOB* blob = new C_BLOB(outline);
+		return new BLOBNBOX(blob);
+	}
+
+	// Rotates the box and the underlying blob.
+	void rotate(FCOORD rotation);
+
+	// Methods that act on the box without touching the underlying blob.
+	// Reflect the box in the y-axis, leaving the underlying blob untouched.
+	void reflect_box_in_y_axis();
+	// Rotates the box by the angle given by rotation.
+	// If the blob is a diacritic, then only small rotations for skew
+	// correction can be applied.
+	void rotate_box(FCOORD rotation);
+	// Moves just the box by the given vector.
+	void translate_box(ICOORD v) {
+		if (IsDiacritic()) {
+			box.move(v);
+			base_char_top_ += v.y();
+			base_char_bottom_ += v.y();
+		}
+		else {
+			box.move(v);
+			set_diacritic_box(box);
+		}
+	}
+	void merge(BLOBNBOX *nextblob);
+	void really_merge(BLOBNBOX* other);
+	void chop(                        // fake chop blob
+		BLOBNBOX_IT *start_it,  // location of this
+		BLOBNBOX_IT *blob_it,   // iterator
+		FCOORD rotation,        // for landscape
+		float xheight);         // line height
+
+	void NeighbourGaps(int gaps[BND_COUNT]) const;
+	void MinMaxGapsClipped(int* h_min, int* h_max,
+		int* v_min, int* v_max) const;
+	void CleanNeighbours();
+	// Returns positive if there is at least one side neighbour that has a
+	// similar stroke width and is not on the other side of a rule line.
+	int GoodTextBlob() const;
+	// Returns the number of side neighbours that are of type BRT_NOISE.
+	int NoisyNeighbours() const;
+
+	// Returns true if the blob is noise and has no owner.
+	bool DeletableNoise() const {
+		return owner() == NULL && region_type() == BRT_NOISE;
+	}
+
+	// Returns true, and sets vert_possible/horz_possible if the blob has some
+	// feature that makes it individually appear to flow one way.
+	// eg if it has a high aspect ratio, yet has a complex shape, such as a
+	// joined word in Latin, Arabic, or Hindi, rather than being a -, I, l, 1.
+	bool DefiniteIndividualFlow();
+
+	// Returns true if there is no tabstop violation in merging this and other.
+	bool ConfirmNoTabViolation(const BLOBNBOX& other) const;
+
+	// Returns true if other has a similar stroke width to this.
+	bool MatchingStrokeWidth(const BLOBNBOX& other,
+		double fractional_tolerance,
+		double constant_tolerance) const;
+
+	// Returns a bounding box of the outline contained within the
+	// given horizontal range.
+	TBOX BoundsWithinLimits(int left, int right);
+
+	// Estimates and stores the baseline position based on the shape of the
+	// outline.
+	void EstimateBaselinePosition();
+
+	// Simple accessors.
+	const TBOX& bounding_box() const {
+		return box;
+	}
+	// Set the bounding box. Use with caution.
+	// Normally use compute_bounding_box instead.
+	void set_bounding_box(const TBOX& new_box) {
+		box = new_box;
+		base_char_top_ = box.top();
+		base_char_bottom_ = box.bottom();
+	}
+	void compute_bounding_box() {
+		box = cblob_ptr->bounding_box();
+		base_char_top_ = box.top();
+		base_char_bottom_ = box.bottom();
+		baseline_y_ = box.bottom();
+	}
+	const TBOX& reduced_box() const {
+		return red_box;
+	}
+	void set_reduced_box(TBOX new_box) {
+		red_box = new_box;
+		reduced = TRUE;
+	}
+	inT32 enclosed_area() const {
+		return area;
+	}
+	bool joined_to_prev() const {
+		return joined != 0;
+	}
+	bool red_box_set() const {
+		return reduced != 0;
+	}
+	int repeated_set() const {
+		return repeated_set_;
+	}
+	void set_repeated_set(int set_id) {
+		repeated_set_ = set_id;
+	}
+	C_BLOB *cblob() const {
+		return cblob_ptr;
+	}
+	TabType left_tab_type() const {
+		return left_tab_type_;
+	}
+	void set_left_tab_type(TabType new_type) {
+		left_tab_type_ = new_type;
+	}
+	TabType right_tab_type() const {
+		return right_tab_type_;
+	}
+	void set_right_tab_type(TabType new_type) {
+		right_tab_type_ = new_type;
+	}
+	BlobRegionType region_type() const {
+		return region_type_;
+	}
+	void set_region_type(BlobRegionType new_type) {
+		region_type_ = new_type;
+	}
+	BlobSpecialTextType special_text_type() const {
+		return spt_type_;
+	}
+	void set_special_text_type(BlobSpecialTextType new_type) {
+		spt_type_ = new_type;
+	}
+	BlobTextFlowType flow() const {
+		return flow_;
+	}
+	void set_flow(BlobTextFlowType value) {
+		flow_ = value;
+	}
+	bool vert_possible() const {
+		return vert_possible_;
+	}
+	void set_vert_possible(bool value) {
+		vert_possible_ = value;
+	}
+	bool horz_possible() const {
+		return horz_possible_;
+	}
+	void set_horz_possible(bool value) {
+		horz_possible_ = value;
+	}
+	int left_rule() const {
+		return left_rule_;
+	}
+	void set_left_rule(int new_left) {
+		left_rule_ = new_left;
+	}
+	int right_rule() const {
+		return right_rule_;
+	}
+	void set_right_rule(int new_right) {
+		right_rule_ = new_right;
+	}
+	int left_crossing_rule() const {
+		return left_crossing_rule_;
+	}
+	void set_left_crossing_rule(int new_left) {
+		left_crossing_rule_ = new_left;
+	}
+	int right_crossing_rule() const {
+		return right_crossing_rule_;
+	}
+	void set_right_crossing_rule(int new_right) {
+		right_crossing_rule_ = new_right;
+	}
+	float horz_stroke_width() const {
+		return horz_stroke_width_;
+	}
+	void set_horz_stroke_width(float width) {
+		horz_stroke_width_ = width;
+	}
+	float vert_stroke_width() const {
+		return vert_stroke_width_;
+	}
+	void set_vert_stroke_width(float width) {
+		vert_stroke_width_ = width;
+	}
+	float area_stroke_width() const {
+		return area_stroke_width_;
+	}
+	tesseract::ColPartition* owner() const {
+		return owner_;
+	}
+	void set_owner(tesseract::ColPartition* new_owner) {
+		owner_ = new_owner;
+	}
+	bool leader_on_left() const {
+		return leader_on_left_;
+	}
+	void set_leader_on_left(bool flag) {
+		leader_on_left_ = flag;
+	}
+	bool leader_on_right() const {
+		return leader_on_right_;
+	}
+	void set_leader_on_right(bool flag) {
+		leader_on_right_ = flag;
+	}
+	BLOBNBOX* neighbour(BlobNeighbourDir n) const {
+		return neighbours_[n];
+	}
+	bool good_stroke_neighbour(BlobNeighbourDir n) const {
+		return good_stroke_neighbours_[n];
+	}
+	void set_neighbour(BlobNeighbourDir n, BLOBNBOX* neighbour, bool good) {
+		neighbours_[n] = neighbour;
+		good_stroke_neighbours_[n] = good;
+	}
+	bool IsDiacritic() const {
+		return base_char_top_ != box.top() || base_char_bottom_ != box.bottom();
+	}
+	int base_char_top() const {
+		return base_char_top_;
+	}
+	int base_char_bottom() const {
+		return base_char_bottom_;
+	}
+	int baseline_position() const {
+		return baseline_y_;
+	}
+	int line_crossings() const {
+		return line_crossings_;
+	}
+	void set_line_crossings(int value) {
+		line_crossings_ = value;
+	}
+	void set_diacritic_box(const TBOX& diacritic_box) {
+		base_char_top_ = diacritic_box.top();
+		base_char_bottom_ = diacritic_box.bottom();
+	}
+	BLOBNBOX* base_char_blob() const {
+		return base_char_blob_;
+	}
+	void set_base_char_blob(BLOBNBOX* blob) {
+		base_char_blob_ = blob;
+	}
+	void set_owns_cblob(bool value) { owns_cblob_ = value; }
+
+	bool UniquelyVertical() const {
+		return vert_possible_ && !horz_possible_;
+	}
+	bool UniquelyHorizontal() const {
+		return horz_possible_ && !vert_possible_;
+	}
+
+	// Returns true if the region type is text.
+	static bool IsTextType(BlobRegionType type) {
+		return type == BRT_TEXT || type == BRT_VERT_TEXT;
+	}
+	// Returns true if the region type is image.
+	static bool IsImageType(BlobRegionType type) {
+		return type == BRT_RECTIMAGE || type == BRT_POLYIMAGE;
+	}
+	// Returns true if the region type is line.
+	static bool IsLineType(BlobRegionType type) {
+		return type == BRT_HLINE || type == BRT_VLINE;
+	}
+	// Returns true if the region type cannot be merged.
+	static bool UnMergeableType(BlobRegionType type) {
+		return IsLineType(type) || IsImageType(type);
+	}
+	// Helper to call CleanNeighbours on all blobs on the list.
+	static void CleanNeighbours(BLOBNBOX_LIST* blobs);
+	// Helper to delete all the deletable blobs on the list.
+	static void DeleteNoiseBlobs(BLOBNBOX_LIST* blobs);
+	// Helper to compute edge offsets for  all the blobs on the list.
+	// See coutln.h for an explanation of edge offsets.
+	static void ComputeEdgeOffsets(Pix* thresholds, Pix* grey,
+		BLOBNBOX_LIST* blobs);
+
+#ifndef GRAPHICS_DISABLED
+	// Helper to draw all the blobs on the list in the given body_colour,
+	// with child outlines in the child_colour.
+	static void PlotBlobs(BLOBNBOX_LIST* list,
+		ScrollView::Color body_colour,
+		ScrollView::Color child_colour,
+		ScrollView* win);
+	// Helper to draw only DeletableNoise blobs (unowned, BRT_NOISE) on the
+	// given list in the given body_colour, with child outlines in the
+	// child_colour.
+	static void PlotNoiseBlobs(BLOBNBOX_LIST* list,
+		ScrollView::Color body_colour,
+		ScrollView::Color child_colour,
+		ScrollView* win);
+
+	static ScrollView::Color TextlineColor(BlobRegionType region_type,
+		BlobTextFlowType flow_type);
+
+	// Keep in sync with BlobRegionType.
+	ScrollView::Color BoxColor() const;
+
+	void plot(ScrollView* window,                // window to draw in
+		ScrollView::Color blob_colour,     // for outer bits
+		ScrollView::Color child_colour);   // for holes
+#endif
+
+  // Initializes the bulk of the members to default values for use at
+  // construction time.
+	void ConstructionInit() {
+		cblob_ptr = NULL;
+		owns_cblob_ = false;
+		area = 0;
+		area_stroke_width_ = 0.0f;
+		horz_stroke_width_ = 0.0f;
+		vert_stroke_width_ = 0.0f;
+		ReInit();
+	}
+	// Initializes members set by StrokeWidth and beyond, without discarding
+	// stored area and strokewidth values, which are expensive to calculate.
+	void ReInit() {
+		joined = false;
+		reduced = false;
+		repeated_set_ = 0;
+		left_tab_type_ = TT_NONE;
+		right_tab_type_ = TT_NONE;
+		region_type_ = BRT_UNKNOWN;
+		flow_ = BTFT_NONE;
+		spt_type_ = BSTT_SKIP;
+		left_rule_ = 0;
+		right_rule_ = 0;
+		left_crossing_rule_ = 0;
+		right_crossing_rule_ = 0;
+		if (area_stroke_width_ == 0.0f && area > 0 && cblob() != NULL)
+			area_stroke_width_ = 2.0f * area / cblob()->perimeter();
+		owner_ = NULL;
+		base_char_top_ = box.top();
+		base_char_bottom_ = box.bottom();
+		baseline_y_ = box.bottom();
+		line_crossings_ = 0;
+		base_char_blob_ = NULL;
+		horz_possible_ = false;
+		vert_possible_ = false;
+		leader_on_left_ = false;
+		leader_on_right_ = false;
+		ClearNeighbours();
+	}
+
+	void ClearNeighbours() {
+		for (int n = 0; n < BND_COUNT; ++n) {
+			neighbours_[n] = NULL;
+			good_stroke_neighbours_[n] = false;
+		}
+	}
+
+private:
+	C_BLOB *cblob_ptr;            // edgestep blob
+	TBOX box;                     // bounding box
+	TBOX red_box;                 // bounding box
+	int area : 30;                  // enclosed area
+	int joined : 1;                 // joined to prev
+	int reduced : 1;                // reduced box set
+	int repeated_set_;            // id of the set of repeated blobs
+	TabType left_tab_type_;       // Indicates tab-stop assessment
+	TabType right_tab_type_;      // Indicates tab-stop assessment
+	BlobRegionType region_type_;  // Type of region this blob belongs to
+	BlobTextFlowType flow_;       // Quality of text flow.
+	inT16 left_rule_;             // x-coord of nearest but not crossing rule line
+	inT16 right_rule_;            // x-coord of nearest but not crossing rule line
+	inT16 left_crossing_rule_;    // x-coord of nearest or crossing rule line
+	inT16 right_crossing_rule_;   // x-coord of nearest or crossing rule line
+	inT16 base_char_top_;         // y-coord of top/bottom of diacritic base,
+	inT16 base_char_bottom_;      // if it exists else top/bottom of this blob.
+	inT16 baseline_y_;            // Estimate of baseline position.
+	int line_crossings_;          // Number of line intersections touched.
+	BLOBNBOX* base_char_blob_;    // The blob that was the base char.
+	float horz_stroke_width_;     // Median horizontal stroke width
+	float vert_stroke_width_;     // Median vertical stroke width
+	float area_stroke_width_;     // Stroke width from area/perimeter ratio.
+	tesseract::ColPartition* owner_;  // Who will delete me when I am not needed
+	BlobSpecialTextType spt_type_;   // Special text type.
+	BLOBNBOX* neighbours_[BND_COUNT];
+	bool good_stroke_neighbours_[BND_COUNT];
+	bool horz_possible_;           // Could be part of horizontal flow.
+	bool vert_possible_;           // Could be part of vertical flow.
+	bool leader_on_left_;          // There is a leader to the left.
+	bool leader_on_right_;         // There is a leader to the right.
+	// Iff true, then the destructor should delete the cblob_ptr.
+	// TODO(rays) migrate all uses to correctly setting this flag instead of
+	// deleting the C_BLOB before deleting the BLOBNBOX.
+	bool owns_cblob_;
+};
+
+class TO_ROW : public ELIST2_LINK
+{
+public:
+	static const int kErrorWeight = 3;
+
+	TO_ROW() {
+		clear();
+	}                            //empty
+	TO_ROW(                 //constructor
+		BLOBNBOX *blob,  //from first blob
+		float top,       //of row //target height
+		float bottom,
+		float row_size);
+
+	void print() const;
+	float max_y() const {  //access function
+		return y_max;
+	}
+	float min_y() const {
+		return y_min;
+	}
+	float mean_y() const {
+		return (y_min + y_max) / 2.0f;
+	}
+	float initial_min_y() const {
+		return initial_y_min;
+	}
+	float line_m() const {  //access to line fit
+		return m;
+	}
+	float line_c() const {
+		return c;
+	}
+	float line_error() const {
+		return error;
+	}
+	float parallel_c() const {
+		return para_c;
+	}
+	float parallel_error() const {
+		return para_error;
+	}
+	float believability() const {  //baseline goodness
+		return credibility;
+	}
+	float intercept() const {  //real parallel_c
+		return y_origin;
+	}
+	void add_blob(                 //put in row
+		BLOBNBOX *blob,  //blob to add
+		float top,       //of row //target height
+		float bottom,
+		float row_size);
+	void insert_blob(  //put in row in order
+		BLOBNBOX *blob);
+
+	BLOBNBOX_LIST *blob_list() {  //get list
+		return &blobs;
+	}
+
+	void set_line(              //set line spec
+		float new_m,  //line to set
+		float new_c,
+		float new_error) {
+		m = new_m;
+		c = new_c;
+		error = new_error;
+	}
+	void set_parallel_line(                 //set fixed gradient line
+		float gradient,  //page gradient
+		float new_c,
+		float new_error) {
+		para_c = new_c;
+		para_error = new_error;
+		credibility =
+			(float)(blobs.length() - kErrorWeight * new_error);
+		y_origin = (float)(new_c / sqrt(1 + gradient * gradient));
+		//real intercept
+	}
+	void set_limits(                  //set min,max
+		float new_min,    //bottom and
+		float new_max) {  //top of row
+		y_min = new_min;
+		y_max = new_max;
+	}
+	void compute_vertical_projection();
+	//get projection
+
+	bool rep_chars_marked() const {
+		return num_repeated_sets_ != -1;
+	}
+	void clear_rep_chars_marked() {
+		num_repeated_sets_ = -1;
+	}
+	int num_repeated_sets() const {
+		return num_repeated_sets_;
+	}
+	void set_num_repeated_sets(int num_sets) {
+		num_repeated_sets_ = num_sets;
+	}
+
+	// true when dead
+	BOOL8 merged;
+	BOOL8 all_caps;              // had no ascenders
+	BOOL8 used_dm_model;         // in guessing pitch
+	inT16 projection_left;       // start of projection
+	inT16 projection_right;      // start of projection
+	PITCH_TYPE pitch_decision;   // how strong is decision
+	float fixed_pitch;           // pitch or 0
+	float fp_space;              // sp if fixed pitch
+	float fp_nonsp;              // nonsp if fixed pitch
+	float pr_space;              // sp if prop
+	float pr_nonsp;              // non sp if prop
+	float spacing;               // to "next" row
+	float xheight;               // of line
+	int xheight_evidence;        // number of blobs of height xheight
+	float ascrise;               // ascenders
+	float descdrop;              // descenders
+	float body_size;             // of CJK characters.  Assumed to be
+								 // xheight+ascrise for non-CJK text.
+	inT32 min_space;             // min size for real space
+	inT32 max_nonspace;          // max size of non-space
+	inT32 space_threshold;       // space vs nonspace
+	float kern_size;             // average non-space
+	float space_size;            // average space
+	WERD_LIST rep_words;         // repeated chars
+	ICOORDELT_LIST char_cells;   // fixed pitch cells
+	QSPLINE baseline;            // curved baseline
+	STATS projection;            // vertical projection
+
+private:
+	void clear();  // clear all values to reasonable defaults
+
+	BLOBNBOX_LIST blobs;         //blobs in row
+	float y_min;                 //coords
+	float y_max;
+	float initial_y_min;
+	float m, c;                  //line spec
+	float error;                 //line error
+	float para_c;                //constrained fit
+	float para_error;
+	float y_origin;              //rotated para_c;
+	float credibility;           //baseline believability
+	int num_repeated_sets_;      // number of sets of repeated blobs
+								 // set to -1 if we have not searched
+								 // for repeated blobs in this row yet
+};
+
+ELIST2IZEH(TO_ROW)
+class TO_BLOCK :public ELIST_LINK
+{
+public:
+	TO_BLOCK() : pitch_decision(PITCH_DUNNO) {
+		clear();
+	}                            //empty
+	TO_BLOCK(                    //constructor
+		BLOCK *src_block);  //real block
+	~TO_BLOCK();
+
+	void clear();  // clear all scalar members.
+
+	TO_ROW_LIST *get_rows() {  //access function
+		return &row_list;
+	}
+
+	// Rotate all the blobnbox lists and the underlying block. Then update the
+	// median size statistic from the blobs list.
+	void rotate(const FCOORD& rotation) {
+		BLOBNBOX_LIST* blobnbox_list[] = { &blobs, &underlines, &noise_blobs,
+										  &small_blobs, &large_blobs, NULL };
+		for (BLOBNBOX_LIST** list = blobnbox_list; *list != NULL; ++list) {
+			BLOBNBOX_IT it(*list);
+			for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
+				it.data()->rotate(rotation);
+			}
+		}
+		// Rotate the block
+		ASSERT_HOST(block->poly_block() != NULL);
+		block->rotate(rotation);
+		// Update the median size statistic from the blobs list.
+		STATS widths(0, block->bounding_box().width());
+		STATS heights(0, block->bounding_box().height());
+		BLOBNBOX_IT blob_it(&blobs);
+		for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) {
+			widths.add(blob_it.data()->bounding_box().width(), 1);
+			heights.add(blob_it.data()->bounding_box().height(), 1);
+		}
+		block->set_median_size(static_cast<int>(widths.median() + 0.5),
+			static_cast<int>(heights.median() + 0.5));
+	}
+
+	void print_rows() {  //debug info
+		TO_ROW_IT row_it = &row_list;
+		TO_ROW *row;
+
+		for (row_it.mark_cycle_pt(); !row_it.cycled_list();
+			row_it.forward()) {
+			row = row_it.data();
+			tprintf("Row range (%g,%g), para_c=%g, blobcount=" INT32FORMAT
+				"\n", row->min_y(), row->max_y(), row->parallel_c(),
+				row->blob_list()->length());
+		}
+	}
+
+	// Reorganizes the blob lists with a different definition of small, medium
+	// and large, compared to the original definition.
+	// Height is still the primary filter key, but medium width blobs of small
+	// height become medium, and very wide blobs of small height stay small.
+	void ReSetAndReFilterBlobs();
+
+	// Deletes noise blobs from all lists where not owned by a ColPartition.
+	void DeleteUnownedNoise();
+
+	// Computes and stores the edge offsets on each blob for use in feature
+	// extraction, using greyscale if the supplied grey and thresholds pixes
+	// are 8-bit or otherwise (if NULL or not 8 bit) the original binary
+	// edge step outlines.
+	// Thresholds must either be the same size as grey or an integer down-scale
+	// of grey.
+	// See coutln.h for an explanation of edge offsets.
+	void ComputeEdgeOffsets(Pix* thresholds, Pix* grey);
+
+#ifndef GRAPHICS_DISABLED
+	// Draw the noise blobs from all lists in red.
+	void plot_noise_blobs(ScrollView* to_win);
+	// Draw the blobs on on the various lists in the block in different colors.
+	void plot_graded_blobs(ScrollView* to_win);
+#endif
+
+	BLOBNBOX_LIST blobs;         //medium size
+	BLOBNBOX_LIST underlines;    //underline blobs
+	BLOBNBOX_LIST noise_blobs;   //very small
+	BLOBNBOX_LIST small_blobs;   //fairly small
+	BLOBNBOX_LIST large_blobs;   //big blobs
+	BLOCK *block;                //real block
+	PITCH_TYPE pitch_decision;   //how strong is decision
+	float line_spacing;          //estimate
+	// line_size is a lower-bound estimate of the font size in pixels of
+	// the text in the block (with ascenders and descenders), being a small
+	// (1.25) multiple of the median height of filtered blobs.
+	// In most cases the font size will be bigger, but it will be closer
+	// if the text is allcaps, or in a no-x-height script.
+	float line_size;             //estimate
+	float max_blob_size;         //line assignment limit
+	float baseline_offset;       //phase shift
+	float xheight;               //median blob size
+	float fixed_pitch;           //pitch or 0
+	float kern_size;             //average non-space
+	float space_size;            //average space
+	inT32 min_space;             //min definite space
+	inT32 max_nonspace;          //max definite
+	float fp_space;              //sp if fixed pitch
+	float fp_nonsp;              //nonsp if fixed pitch
+	float pr_space;              //sp if prop
+	float pr_nonsp;              //non sp if prop
+	TO_ROW *key_row;             //starting row
+
+private:
+	TO_ROW_LIST row_list;        //temporary rows
+};
+
+ELISTIZEH(TO_BLOCK)
+extern double_VAR_H(textord_error_weight, 3,
+	"Weighting for error in believability");
+void find_cblob_limits(                  //get y limits
+	C_BLOB *blob,     //blob to search
+	float leftx,      //x limits
+	float rightx,
+	FCOORD rotation,  //for landscape
+	float &ymin,      //output y limits
+	float &ymax);
+void find_cblob_vlimits(               //get y limits
+	C_BLOB *blob,  //blob to search
+	float leftx,   //x limits
+	float rightx,
+	float &ymin,   //output y limits
+	float &ymax);
+void find_cblob_hlimits(                //get x limits
+	C_BLOB *blob,   //blob to search
+	float bottomy,  //y limits
+	float topy,
+	float &xmin,    //output x limits
+	float &xymax);
+C_BLOB *crotate_cblob(                 //rotate it
+	C_BLOB *blob,    //blob to search
+	FCOORD rotation  //for landscape
+);
+TBOX box_next(                 //get bounding box
+	BLOBNBOX_IT *it  //iterator to blobds
+);
+TBOX box_next_pre_chopped(                 //get bounding box
+	BLOBNBOX_IT *it  //iterator to blobds
+);
+void vertical_cblob_projection(               //project outlines
+	C_BLOB *blob,  //blob to project
+	STATS *stats   //output
+);
+void vertical_coutline_projection(                     //project outlines
+	C_OUTLINE *outline,  //outline to project
+	STATS *stats         //output
+);
+#ifndef GRAPHICS_DISABLED
+void plot_blob_list(ScrollView* win,                   // window to draw in
+	BLOBNBOX_LIST *list,               // blob list
+	ScrollView::Color body_colour,     // colour to draw
+	ScrollView::Color child_colour);   // colour of child
+#endif  // GRAPHICS_DISABLED
+#endif
--- a/hgdriver/3rdparty/hgOCR/include/ccstruct/blobs.cpp
+++ b/hgdriver/3rdparty/hgOCR/include/ccstruct/blobs.cpp
--- a/hgdriver/3rdparty/hgOCR/include/ccstruct/blobs.h
+++ b/hgdriver/3rdparty/hgOCR/include/ccstruct/blobs.h
@ -0,0 +1,449 @@
+/* -*-C-*-
+ ********************************************************************************
+ *
+ * File:        blobs.h  (Formerly blobs.h)
+ * Description:  Blob definition
+ * Author:       Mark Seaman, OCR Technology
+ * Created:      Fri Oct 27 15:39:52 1989
+ * Modified:     Thu Mar 28 15:33:38 1991 (Mark Seaman) marks@hpgrlt
+ * Language:     C
+ * Package:      N/A
+ * Status:       Experimental (Do Not Distribute)
+ *
+ * (c) Copyright 1989, Hewlett-Packard Company.
+ ** Licensed under the Apache License, Version 2.0 (the "License");
+ ** you may not use this file except in compliance with the License.
+ ** You may obtain a copy of the License at
+ ** http://www.apache.org/licenses/LICENSE-2.0
+ ** Unless required by applicable law or agreed to in writing, software
+ ** distributed under the License is distributed on an "AS IS" BASIS,
+ ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ ** See the License for the specific language governing permissions and
+ ** limitations under the License.
+ *
+ *********************************************************************************/
+
+#ifndef BLOBS_H
+#define BLOBS_H
+
+/*----------------------------------------------------------------------
+              I n c l u d e s
+----------------------------------------------------------------------*/
+#include "clst.h"
+#include "normalis.h"
+#include "publictypes.h"
+#include "rect.h"
+#include "vecfuncs.h"
+
+class BLOCK;
+class C_BLOB;
+class C_OUTLINE;
+class LLSQ;
+class ROW;
+class WERD;
+
+/*----------------------------------------------------------------------
+              T y p e s
+----------------------------------------------------------------------*/
+#define EDGEPTFLAGS     4        /*concavity,length etc. */
+
+struct TPOINT {
+  TPOINT(): x(0), y(0) {}
+  TPOINT(inT16 vx, inT16 vy) : x(vx), y(vy) {}
+  TPOINT(const ICOORD &ic) : x(ic.x()), y(ic.y()) {}
+
+  void operator+=(const TPOINT& other) {
+    x += other.x;
+    y += other.y;
+  }
+  void operator/=(int divisor) {
+    x /= divisor;
+    y /= divisor;
+  }
+  bool operator==(const TPOINT& other) const {
+    return x == other.x && y == other.y;
+  }
+  // Returns true when the two line segments cross each other.
+  // (Moved from outlines.cpp).
+  static bool IsCrossed(const TPOINT& a0, const TPOINT& a1, const TPOINT& b0,
+                        const TPOINT& b1);
+
+  inT16 x;                       // absolute x coord.
+  inT16 y;                       // absolute y coord.
+};
+typedef TPOINT VECTOR;           // structure for coordinates.
+
+struct EDGEPT {
+  EDGEPT()
+  : next(NULL), prev(NULL), src_outline(NULL), start_step(0), step_count(0) {
+    memset(flags, 0, EDGEPTFLAGS * sizeof(flags[0]));
+  }
+  EDGEPT(const EDGEPT& src) : next(NULL), prev(NULL) {
+    CopyFrom(src);
+  }
+  EDGEPT& operator=(const EDGEPT& src) {
+    CopyFrom(src);
+    return *this;
+  }
+  // Copies the data elements, but leaves the pointers untouched.
+  void CopyFrom(const EDGEPT& src) {
+    pos = src.pos;
+    vec = src.vec;
+    memcpy(flags, src.flags, EDGEPTFLAGS * sizeof(flags[0]));
+    src_outline = src.src_outline;
+    start_step = src.start_step;
+    step_count = src.step_count;
+  }
+  // Returns the squared distance between the points, with the x-component
+  // weighted by x_factor.
+  int WeightedDistance(const EDGEPT& other, int x_factor) const {
+    int x_dist = pos.x - other.pos.x;
+    int y_dist = pos.y - other.pos.y;
+    return x_dist * x_dist * x_factor + y_dist * y_dist;
+  }
+  // Returns true if the positions are equal.
+  bool EqualPos(const EDGEPT& other) const { return pos == other.pos; }
+  // Returns the bounding box of the outline segment from *this to *end.
+  // Ignores hidden edge flags.
+  TBOX SegmentBox(const EDGEPT* end) const {
+    TBOX box(pos.x, pos.y, pos.x, pos.y);
+    const EDGEPT* pt = this;
+    do {
+      pt = pt->next;
+      if (pt->pos.x < box.left()) box.set_left(pt->pos.x);
+      if (pt->pos.x > box.right()) box.set_right(pt->pos.x);
+      if (pt->pos.y < box.bottom()) box.set_bottom(pt->pos.y);
+      if (pt->pos.y > box.top()) box.set_top(pt->pos.y);
+    } while (pt != end && pt != this);
+    return box;
+  }
+  // Returns the area of the outline segment from *this to *end.
+  // Ignores hidden edge flags.
+  int SegmentArea(const EDGEPT* end) const {
+    int area = 0;
+    const EDGEPT* pt = this->next;
+    do {
+      TPOINT origin_vec(pt->pos.x - pos.x, pt->pos.y - pos.y);
+      area += CROSS(origin_vec, pt->vec);
+      pt = pt->next;
+    } while (pt != end && pt != this);
+    return area;
+  }
+  // Returns true if the number of points in the outline segment from *this to
+  // *end is less that min_points and false if we get back to *this first.
+  // Ignores hidden edge flags.
+  bool ShortNonCircularSegment(int min_points, const EDGEPT* end) const {
+    int count = 0;
+    const EDGEPT* pt = this;
+    do {
+      if (pt == end) return true;
+      pt = pt->next;
+      ++count;
+    } while (pt != this && count <= min_points);
+    return false;
+  }
+
+  // Accessors to hide or reveal a cut edge from feature extractors.
+  void Hide() {
+    flags[0] = true;
+  }
+  void Reveal() {
+    flags[0] = false;
+  }
+  bool IsHidden() const {
+    return flags[0] != 0;
+  }
+  void MarkChop() {
+    flags[2] = true;
+  }
+  bool IsChopPt() const {
+    return flags[2] != 0;
+  }
+
+  TPOINT pos;                    // position
+  VECTOR vec;                    // vector to next point
+  // TODO(rays) Remove flags and replace with
+  // is_hidden, runlength, dir, and fixed. The only use
+  // of the flags other than is_hidden is in polyaprx.cpp.
+  char flags[EDGEPTFLAGS];       // concavity, length etc
+  EDGEPT* next;                  // anticlockwise element
+  EDGEPT* prev;                  // clockwise element
+  C_OUTLINE* src_outline;        // Outline it came from.
+  // The following fields are not used if src_outline is NULL.
+  int start_step;                // Location of pos in src_outline.
+  int step_count;                // Number of steps used (may wrap around).
+};
+
+// For use in chop and findseam to keep a list of which EDGEPTs were inserted.
+CLISTIZEH(EDGEPT);
+
+struct TESSLINE {
+  TESSLINE() : is_hole(false), loop(NULL), next(NULL) {}
+  TESSLINE(const TESSLINE& src) : loop(NULL), next(NULL) {
+    CopyFrom(src);
+  }
+  ~TESSLINE() {
+    Clear();
+  }
+  TESSLINE& operator=(const TESSLINE& src) {
+    CopyFrom(src);
+    return *this;
+  }
+  // Consume the circular list of EDGEPTs to make a TESSLINE.
+  static TESSLINE* BuildFromOutlineList(EDGEPT* outline);
+  // Copies the data and the outline, but leaves next untouched.
+  void CopyFrom(const TESSLINE& src);
+  // Deletes owned data.
+  void Clear();
+  // Normalize in-place using the DENORM.
+  void Normalize(const DENORM& denorm);
+  // Rotates by the given rotation in place.
+  void Rotate(const FCOORD rotation);
+  // Moves by the given vec in place.
+  void Move(const ICOORD vec);
+  // Scales by the given factor in place.
+  void Scale(float factor);
+  // Sets up the start and vec members of the loop from the pos members.
+  void SetupFromPos();
+  // Recomputes the bounding box from the points in the loop.
+  void ComputeBoundingBox();
+  // Computes the min and max cross product of the outline points with the
+  // given vec and returns the results in min_xp and max_xp. Geometrically
+  // this is the left and right edge of the outline perpendicular to the
+  // given direction, but to get the distance units correct, you would
+  // have to divide by the modulus of vec.
+  void MinMaxCrossProduct(const TPOINT vec, int* min_xp, int* max_xp) const;
+
+  TBOX bounding_box() const;
+  // Returns true if *this and other have equal bounding boxes.
+  bool SameBox(const TESSLINE& other) const {
+    return topleft == other.topleft && botright == other.botright;
+  }
+  // Returns true if the given line segment crosses any outline of this blob.
+  bool SegmentCrosses(const TPOINT& pt1, const TPOINT& pt2) const {
+    if (Contains(pt1) && Contains(pt2)) {
+      EDGEPT* pt = loop;
+      do {
+        if (TPOINT::IsCrossed(pt1, pt2, pt->pos, pt->next->pos)) return true;
+        pt = pt->next;
+      } while (pt != loop);
+    }
+    return false;
+  }
+  // Returns true if the point is contained within the outline box.
+  bool Contains(const TPOINT& pt) const {
+    return topleft.x <= pt.x && pt.x <= botright.x &&
+           botright.y <= pt.y && pt.y <= topleft.y;
+  }
+
+  #ifndef GRAPHICS_DISABLED
+  void plot(ScrollView* window, ScrollView::Color color,
+            ScrollView::Color child_color);
+  #endif  // GRAPHICS_DISABLED
+
+  // Returns the first outline point that has a different src_outline to its
+  // predecessor, or, if all the same, the lowest indexed point.
+  EDGEPT* FindBestStartPt() const;
+
+
+  int BBArea() const {
+    return (botright.x - topleft.x) * (topleft.y - botright.y);
+  }
+
+  TPOINT topleft;                // Top left of loop.
+  TPOINT botright;               // Bottom right of loop.
+  TPOINT start;                  // Start of loop.
+  bool is_hole;                  // True if this is a hole/child outline.
+  EDGEPT *loop;                  // Edgeloop.
+  TESSLINE *next;                // Next outline in blob.
+};                               // Outline structure.
+
+struct TBLOB {
+  TBLOB() : outlines(NULL) {}
+  TBLOB(const TBLOB& src) : outlines(NULL) {
+    CopyFrom(src);
+  }
+  ~TBLOB() {
+    Clear();
+  }
+  TBLOB& operator=(const TBLOB& src) {
+    CopyFrom(src);
+    return *this;
+  }
+  // Factory to build a TBLOB from a C_BLOB with polygonal approximation along
+  // the way. If allow_detailed_fx is true, the EDGEPTs in the returned TBLOB
+  // contain pointers to the input C_OUTLINEs that enable higher-resolution
+  // feature extraction that does not use the polygonal approximation.
+  static TBLOB* PolygonalCopy(bool allow_detailed_fx, C_BLOB* src);
+  // Factory builds a blob with no outlines, but copies the other member data.
+  static TBLOB* ShallowCopy(const TBLOB& src);
+  // Normalizes the blob for classification only if needed.
+  // (Normally this means a non-zero classify rotation.)
+  // If no Normalization is needed, then NULL is returned, and the input blob
+  // can be used directly. Otherwise a new TBLOB is returned which must be
+  // deleted after use.
+  TBLOB* ClassifyNormalizeIfNeeded() const;
+
+  // Copies the data and the outlines, but leaves next untouched.
+  void CopyFrom(const TBLOB& src);
+  // Deletes owned data.
+  void Clear();
+  // Sets up the built-in DENORM and normalizes the blob in-place.
+  // For parameters see DENORM::SetupNormalization, plus the inverse flag for
+  // this blob and the Pix for the full image.
+  void Normalize(const BLOCK* block,
+                 const FCOORD* rotation,
+                 const DENORM* predecessor,
+                 float x_origin, float y_origin,
+                 float x_scale, float y_scale,
+                 float final_xshift, float final_yshift,
+                 bool inverse, Pix* pix);
+  // Rotates by the given rotation in place.
+  void Rotate(const FCOORD rotation);
+  // Moves by the given vec in place.
+  void Move(const ICOORD vec);
+  // Scales by the given factor in place.
+  void Scale(float factor);
+  // Recomputes the bounding boxes of the outlines.
+  void ComputeBoundingBoxes();
+
+  // Returns the number of outlines.
+  int NumOutlines() const;
+
+  TBOX bounding_box() const;
+
+  // Returns true if the given line segment crosses any outline of this blob.
+  bool SegmentCrossesOutline(const TPOINT& pt1, const TPOINT& pt2) const {
+    for (const TESSLINE* outline = outlines; outline != NULL;
+         outline = outline->next) {
+      if (outline->SegmentCrosses(pt1, pt2)) return true;
+    }
+    return false;
+  }
+  // Returns true if the point is contained within any of the outline boxes.
+  bool Contains(const TPOINT& pt) const {
+    for (const TESSLINE* outline = outlines; outline != NULL;
+         outline = outline->next) {
+      if (outline->Contains(pt)) return true;
+    }
+    return false;
+  }
+
+  // Finds and deletes any duplicate outlines in this blob, without deleting
+  // their EDGEPTs.
+  void EliminateDuplicateOutlines();
+
+  // Swaps the outlines of *this and next if needed to keep the centers in
+  // increasing x.
+  void CorrectBlobOrder(TBLOB* next);
+
+  const DENORM& denorm() const {
+    return denorm_;
+  }
+
+  #ifndef GRAPHICS_DISABLED
+  void plot(ScrollView* window, ScrollView::Color color,
+            ScrollView::Color child_color);
+  #endif  // GRAPHICS_DISABLED
+
+  int BBArea() const {
+    int total_area = 0;
+    for (TESSLINE* outline = outlines; outline != NULL; outline = outline->next)
+      total_area += outline->BBArea();
+    return total_area;
+  }
+
+  // Computes the center of mass and second moments for the old baseline and
+  // 2nd moment normalizations. Returns the outline length.
+  // The input denorm should be the normalizations that have been applied from
+  // the image to the current state of this TBLOB.
+  int ComputeMoments(FCOORD* center, FCOORD* second_moments) const;
+  // Computes the precise bounding box of the coords that are generated by
+  // GetEdgeCoords. This may be different from the bounding box of the polygon.
+  void GetPreciseBoundingBox(TBOX* precise_box) const;
+  // Adds edges to the given vectors.
+  // For all the edge steps in all the outlines, or polygonal approximation
+  // where there are no edge steps, collects the steps into x_coords/y_coords.
+  // x_coords is a collection of the x-coords of vertical edges for each
+  // y-coord starting at box.bottom().
+  // y_coords is a collection of the y-coords of horizontal edges for each
+  // x-coord starting at box.left().
+  // Eg x_coords[0] is a collection of the x-coords of edges at y=bottom.
+  // Eg x_coords[1] is a collection of the x-coords of edges at y=bottom + 1.
+  void GetEdgeCoords(const TBOX& box,
+                     GenericVector<GenericVector<int> >* x_coords,
+                     GenericVector<GenericVector<int> >* y_coords) const;
+
+  TESSLINE *outlines;            // List of outlines in blob.
+
+ private:  // TODO(rays) Someday the data members will be private too.
+  // For all the edge steps in all the outlines, or polygonal approximation
+  // where there are no edge steps, collects the steps into the bounding_box,
+  // llsq and/or the x_coords/y_coords. Both are used in different kinds of
+  // normalization.
+  // For a description of x_coords, y_coords, see GetEdgeCoords above.
+  void CollectEdges(const TBOX& box,
+                    TBOX* bounding_box, LLSQ* llsq,
+                    GenericVector<GenericVector<int> >* x_coords,
+                    GenericVector<GenericVector<int> >* y_coords) const;
+
+ private:
+  // DENORM indicating the transformations that this blob has undergone so far.
+  DENORM denorm_;
+};                               // Blob structure.
+
+struct TWERD {
+  TWERD() : latin_script(false) {}
+  TWERD(const TWERD& src) {
+    CopyFrom(src);
+  }
+  ~TWERD() {
+    Clear();
+  }
+  TWERD& operator=(const TWERD& src) {
+    CopyFrom(src);
+    return *this;
+  }
+  // Factory to build a TWERD from a (C_BLOB) WERD, with polygonal
+  // approximation along the way.
+  static TWERD* PolygonalCopy(bool allow_detailed_fx, WERD* src);
+  // Baseline normalizes the blobs in-place, recording the normalization in the
+  // DENORMs in the blobs.
+  void BLNormalize(const BLOCK* block, const ROW* row, Pix* pix, bool inverse,
+                   float x_height, float baseline_shift, bool numeric_mode,
+                   tesseract::OcrEngineMode hint,
+                   const TBOX* norm_box,
+                   DENORM* word_denorm);
+  // Copies the data and the blobs, but leaves next untouched.
+  void CopyFrom(const TWERD& src);
+  // Deletes owned data.
+  void Clear();
+  // Recomputes the bounding boxes of the blobs.
+  void ComputeBoundingBoxes();
+
+  // Returns the number of blobs in the word.
+  int NumBlobs() const {
+    return blobs.size();
+  }
+  TBOX bounding_box() const;
+
+  // Merges the blobs from start to end, not including end, and deletes
+  // the blobs between start and end.
+  void MergeBlobs(int start, int end);
+
+  void plot(ScrollView* window);
+
+  GenericVector<TBLOB*> blobs;   // Blobs in word.
+  bool latin_script;             // This word is in a latin-based script.
+};
+
+/*----------------------------------------------------------------------
+              F u n c t i o n s
+----------------------------------------------------------------------*/
+// TODO(rays) Make divisible_blob and divide_blobs members of TBLOB.
+bool divisible_blob(TBLOB *blob, bool italic_blob, TPOINT* location);
+
+void divide_blobs(TBLOB *blob, TBLOB *other_blob, bool italic_blob,
+                  const TPOINT& location);
+
+#endif
--- a/hgdriver/3rdparty/hgOCR/include/ccstruct/blread.cpp
+++ b/hgdriver/3rdparty/hgOCR/include/ccstruct/blread.cpp
@ -0,0 +1,71 @@
+/**********************************************************************
+ * File:        blread.cpp  (Formerly pdread.c)
+ * Description: Friend function of BLOCK to read the uscan pd file.
+ * Author:		Ray Smith
+ * Created:		Mon Mar 18 14:39:00 GMT 1991
+ *
+ * (C) Copyright 1991, Hewlett-Packard Ltd.
+ ** Licensed under the Apache License, Version 2.0 (the "License");
+ ** you may not use this file except in compliance with the License.
+ ** You may obtain a copy of the License at
+ ** http://www.apache.org/licenses/LICENSE-2.0
+ ** Unless required by applicable law or agreed to in writing, software
+ ** distributed under the License is distributed on an "AS IS" BASIS,
+ ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ ** See the License for the specific language governing permissions and
+ ** limitations under the License.
+ *
+ **********************************************************************/
+
+#include          <stdlib.h>
+#ifdef __UNIX__
+#include          <assert.h>
+#endif
+#include          "scanutils.h"
+#include          "fileerr.h"
+#include          "blread.h"
+
+#define UNLV_EXT  ".uzn"  // unlv zone file
+
+/**********************************************************************
+ * read_unlv_file
+ *
+ * Read a whole unlv zone file to make a list of blocks.
+ **********************************************************************/
+
+bool read_unlv_file(                    //print list of sides
+                     STRING name,        //basename of file
+                     inT32 xsize,        //image size
+                     inT32 ysize,        //image size
+                     BLOCK_LIST *blocks  //output list
+                    ) {
+  FILE *pdfp;                    //file pointer
+  BLOCK *block;                  //current block
+  int x;                         //current top-down coords
+  int y;
+  int width;                     //of current block
+  int height;
+  BLOCK_IT block_it = blocks;    //block iterator
+
+  name += UNLV_EXT;              //add extension
+  if ((pdfp = fopen (name.string (), "rb")) == NULL) {
+    return false;                //didn't read one
+  } else {
+    while (tfscanf(pdfp, "%d %d %d %d %*s", &x, &y, &width, &height) >= 4) {
+                                 //make rect block
+      block = new BLOCK (name.string (), TRUE, 0, 0,
+                         (inT16) x, (inT16) (ysize - y - height),
+                         (inT16) (x + width), (inT16) (ysize - y));
+                                 //on end of list
+      block_it.add_to_end (block);
+    }
+    fclose(pdfp);
+  }
+  return true;
+}
+
+void FullPageBlock(int width, int height, BLOCK_LIST *blocks) {
+  BLOCK_IT block_it(blocks);
+  BLOCK* block = new BLOCK("", TRUE, 0, 0, 0, 0, width, height);
+  block_it.add_to_end(block);
+}
--- a/hgdriver/3rdparty/hgOCR/include/ccstruct/blread.h
+++ b/hgdriver/3rdparty/hgOCR/include/ccstruct/blread.h
@ -0,0 +1,33 @@
+/**********************************************************************
+ * File:        blread.h  (Formerly pdread.h)
+ * Description: Friend function of BLOCK to read the uscan pd file.
+ * Author:		Ray Smith
+ * Created:		Mon Mar 18 14:39:00 GMT 1991
+ *
+ * (C) Copyright 1991, Hewlett-Packard Ltd.
+ ** Licensed under the Apache License, Version 2.0 (the "License");
+ ** you may not use this file except in compliance with the License.
+ ** You may obtain a copy of the License at
+ ** http://www.apache.org/licenses/LICENSE-2.0
+ ** Unless required by applicable law or agreed to in writing, software
+ ** distributed under the License is distributed on an "AS IS" BASIS,
+ ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ ** See the License for the specific language governing permissions and
+ ** limitations under the License.
+ *
+ **********************************************************************/
+
+#ifndef           BLREAD_H
+#define           BLREAD_H
+
+#include          "params.h"
+#include          "ocrblock.h"
+
+bool read_unlv_file(                    //print list of sides
+                     STRING name,        //basename of file
+                     inT32 xsize,        //image size
+                     inT32 ysize,        //image size
+                     BLOCK_LIST *blocks  //output list
+                    );
+void FullPageBlock(int width, int height, BLOCK_LIST *blocks);
+#endif
--- a/hgdriver/3rdparty/hgOCR/include/ccstruct/boxread.cpp
+++ b/hgdriver/3rdparty/hgOCR/include/ccstruct/boxread.cpp
@ -0,0 +1,235 @@
+/**********************************************************************
+ * File:        boxread.cpp
+ * Description: Read data from a box file.
+ * Author:      Ray Smith
+ * Created:     Fri Aug 24 17:47:23 PDT 2007
+ *
+ * (C) Copyright 2007, Google Inc.
+ ** Licensed under the Apache License, Version 2.0 (the "License");
+ ** you may not use this file except in compliance with the License.
+ ** You may obtain a copy of the License at
+ ** http://www.apache.org/licenses/LICENSE-2.0
+ ** Unless required by applicable law or agreed to in writing, software
+ ** distributed under the License is distributed on an "AS IS" BASIS,
+ ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ ** See the License for the specific language governing permissions and
+ ** limitations under the License.
+ *
+ **********************************************************************/
+
+#include "boxread.h"
+#include <string.h>
+
+#include "fileerr.h"
+#include "rect.h"
+#include "strngs.h"
+#include "tprintf.h"
+#include "unichar.h"
+
+// Special char code used to identify multi-blob labels.
+static const char* kMultiBlobLabelCode = "WordStr";
+
+// Open the boxfile based on the given image filename.
+FILE* OpenBoxFile(const STRING& fname) {
+  STRING filename = BoxFileName(fname);
+  FILE* box_file = NULL;
+  if (!(box_file = fopen(filename.string(), "rb"))) {
+    CANTOPENFILE.error("read_next_box", TESSEXIT, "Can't open box file %s",
+                       filename.string());
+  }
+  return box_file;
+}
+
+// Reads all boxes from the given filename.
+// Reads a specific target_page number if >= 0, or all pages otherwise.
+// Skips blanks if skip_blanks is true.
+// The UTF-8 label of the box is put in texts, and the full box definition as
+// a string is put in box_texts, with the corresponding page number in pages.
+// Each of the output vectors is optional (may be NULL).
+// Returns false if no boxes are found.
+bool ReadAllBoxes(int target_page, bool skip_blanks, const STRING& filename,
+                  GenericVector<TBOX>* boxes,
+                  GenericVector<STRING>* texts,
+                  GenericVector<STRING>* box_texts,
+                  GenericVector<int>* pages) {
+  GenericVector<char> box_data;
+  if (!tesseract::LoadDataFromFile(BoxFileName(filename), &box_data))
+    return false;
+  // Convert the array of bytes to a string, so it can be used by the parser.
+  box_data.push_back('\0');
+  return ReadMemBoxes(target_page, skip_blanks, &box_data[0], boxes, texts,
+                      box_texts, pages);
+}
+
+// Reads all boxes from the string. Otherwise, as ReadAllBoxes.
+bool ReadMemBoxes(int target_page, bool skip_blanks, const char* box_data,
+                  GenericVector<TBOX>* boxes,
+                  GenericVector<STRING>* texts,
+                  GenericVector<STRING>* box_texts,
+                  GenericVector<int>* pages) {
+  STRING box_str(box_data);
+  GenericVector<STRING> lines;
+  box_str.split('\n', &lines);
+  if (lines.empty()) return false;
+  int num_boxes = 0;
+  for (int i = 0; i < lines.size(); ++i) {
+    int page = 0;
+    STRING utf8_str;
+    TBOX box;
+    if (!ParseBoxFileStr(lines[i].string(), &page, &utf8_str, &box)) {
+      continue;
+    }
+    if (skip_blanks && (utf8_str == " " || utf8_str == "\t")) continue;
+    if (target_page >= 0 && page != target_page) continue;
+    if (boxes != NULL) boxes->push_back(box);
+    if (texts != NULL) texts->push_back(utf8_str);
+    if (box_texts != NULL) {
+      STRING full_text;
+      MakeBoxFileStr(utf8_str.string(), box, target_page, &full_text);
+      box_texts->push_back(full_text);
+    }
+    if (pages != NULL) pages->push_back(page);
+    ++num_boxes;
+  }
+  return num_boxes > 0;
+}
+
+// Returns the box file name corresponding to the given image_filename.
+STRING BoxFileName(const STRING& image_filename) {
+  STRING box_filename = image_filename;
+  const char *lastdot = strrchr(box_filename.string(), '.');
+  if (lastdot != NULL)
+    box_filename.truncate_at(lastdot - box_filename.string());
+
+  box_filename += ".box";
+  return box_filename;
+}
+
+// TODO(rays) convert all uses of ReadNextBox to use the new ReadAllBoxes.
+// Box files are used ONLY DURING TRAINING, but by both processes of
+// creating tr files with tesseract, and unicharset_extractor.
+// ReadNextBox factors out the code to interpret a line of a box
+// file so that applybox and unicharset_extractor interpret the same way.
+// This function returns the next valid box file utf8 string and coords
+// and returns true, or false on eof (and closes the file).
+// It ignores the utf8 file signature ByteOrderMark (U+FEFF=EF BB BF), checks
+// for valid utf-8 and allows space or tab between fields.
+// utf8_str is set with the unichar string, and bounding box with the box.
+// If there are page numbers in the file, it reads them all.
+bool ReadNextBox(int *line_number, FILE* box_file,
+                 STRING* utf8_str, TBOX* bounding_box) {
+  return ReadNextBox(-1, line_number, box_file, utf8_str, bounding_box);
+}
+
+// As ReadNextBox above, but get a specific page number. (0-based)
+// Use -1 to read any page number. Files without page number all
+// read as if they are page 0.
+bool ReadNextBox(int target_page, int *line_number, FILE* box_file,
+                 STRING* utf8_str, TBOX* bounding_box) {
+  int page = 0;
+  char buff[kBoxReadBufSize];   // boxfile read buffer
+  char *buffptr = buff;
+
+  while (fgets(buff, sizeof(buff) - 1, box_file)) {
+    (*line_number)++;
+
+    buffptr = buff;
+    const unsigned char *ubuf = reinterpret_cast<const unsigned char*>(buffptr);
+    if (ubuf[0] == 0xef && ubuf[1] == 0xbb && ubuf[2] == 0xbf)
+      buffptr += 3;  // Skip unicode file designation.
+    // Check for blank lines in box file
+    if (*buffptr == '\n' || *buffptr == '\0') continue;
+    // Skip blank boxes.
+    if (*buffptr == ' ' || *buffptr == '\t') continue;
+    if (*buffptr != '\0') {
+      if (!ParseBoxFileStr(buffptr, &page, utf8_str, bounding_box)) {
+        tprintf("Box file format error on line %i; ignored\n", *line_number);
+        continue;
+      }
+      if (target_page >= 0 && target_page != page)
+        continue;  // Not on the appropriate page.
+      return true;  // Successfully read a box.
+    }
+  }
+  fclose(box_file);
+  return false;  // EOF
+}
+
+// Parses the given box file string into a page_number, utf8_str, and
+// bounding_box. Returns true on a successful parse.
+// The box file is assumed to contain box definitions, one per line, of the
+// following format for blob-level boxes:
+//   <UTF8 str> <left> <bottom> <right> <top> <page id>
+// and for word/line-level boxes:
+//   WordStr <left> <bottom> <right> <top> <page id> #<space-delimited word str>
+// See applyybox.cpp for more information.
+bool ParseBoxFileStr(const char* boxfile_str, int* page_number,
+                     STRING* utf8_str, TBOX* bounding_box) {
+  *bounding_box = TBOX();       // Initialize it to empty.
+  *utf8_str = "";
+  char uch[kBoxReadBufSize];
+  const char *buffptr = boxfile_str;
+  // Read the unichar without messing up on Tibetan.
+  // According to issue 253 the utf-8 surrogates 85 and A0 are treated
+  // as whitespace by sscanf, so it is more reliable to just find
+  // ascii space and tab.
+  int uch_len = 0;
+  // Skip unicode file designation, if present.
+  const unsigned char *ubuf = reinterpret_cast<const unsigned char*>(buffptr);
+  if (ubuf[0] == 0xef && ubuf[1] == 0xbb && ubuf[2] == 0xbf)
+      buffptr += 3;
+  // Allow a single blank as the UTF-8 string. Check for empty string and
+  // then blindly eat the first character.
+  if (*buffptr == '\0') return false;
+  do {
+    uch[uch_len++] = *buffptr++;
+  } while (*buffptr != '\0' && *buffptr != ' ' && *buffptr != '\t' &&
+           uch_len < kBoxReadBufSize - 1);
+  uch[uch_len] = '\0';
+  if (*buffptr != '\0') ++buffptr;
+  int x_min, y_min, x_max, y_max;
+  *page_number = 0;
+  int count = sscanf(buffptr, "%d %d %d %d %d",
+                 &x_min, &y_min, &x_max, &y_max, page_number);
+  if (count != 5 && count != 4) {
+    tprintf("Bad box coordinates in boxfile string! %s\n", ubuf);
+    return false;
+  }
+  // Test for long space-delimited string label.
+  if (strcmp(uch, kMultiBlobLabelCode) == 0 &&
+      (buffptr = strchr(buffptr, '#')) != NULL) {
+    strncpy(uch, buffptr + 1, kBoxReadBufSize - 1);
+    uch[kBoxReadBufSize - 1] = '\0';  // Prevent buffer overrun.
+    chomp_string(uch);
+    uch_len = strlen(uch);
+  }
+  // Validate UTF8 by making unichars with it.
+  int used = 0;
+  while (used < uch_len) {
+    UNICHAR ch(uch + used, uch_len - used);
+    int new_used = ch.utf8_len();
+    if (new_used == 0) {
+      tprintf("Bad UTF-8 str %s starts with 0x%02x at col %d\n",
+              uch + used, uch[used], used + 1);
+      return false;
+    }
+    used += new_used;
+  }
+  *utf8_str = uch;
+  if (x_min > x_max) Swap(&x_min, &x_max);
+  if (y_min > y_max) Swap(&y_min, &y_max);
+  bounding_box->set_to_given_coords(x_min, y_min, x_max, y_max);
+  return true;  // Successfully read a box.
+}
+
+// Creates a box file string from a unichar string, TBOX and page number.
+void MakeBoxFileStr(const char* unichar_str, const TBOX& box, int page_num,
+                    STRING* box_str) {
+  *box_str = unichar_str;
+  box_str->add_str_int(" ", box.left());
+  box_str->add_str_int(" ", box.bottom());
+  box_str->add_str_int(" ", box.right());
+  box_str->add_str_int(" ", box.top());
+  box_str->add_str_int(" ", page_num);
+}
+
--- a/hgdriver/3rdparty/hgOCR/include/ccstruct/boxread.h
+++ b/hgdriver/3rdparty/hgOCR/include/ccstruct/boxread.h
@ -0,0 +1,85 @@
+/**********************************************************************
+ * File:        boxread.cpp
+ * Description: Read data from a box file.
+ * Author:		Ray Smith
+ * Created:		Fri Aug 24 17:47:23 PDT 2007
+ *
+ * (C) Copyright 2007, Google Inc.
+ ** Licensed under the Apache License, Version 2.0 (the "License");
+ ** you may not use this file except in compliance with the License.
+ ** You may obtain a copy of the License at
+ ** http://www.apache.org/licenses/LICENSE-2.0
+ ** Unless required by applicable law or agreed to in writing, software
+ ** distributed under the License is distributed on an "AS IS" BASIS,
+ ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ ** See the License for the specific language governing permissions and
+ ** limitations under the License.
+ *
+ **********************************************************************/
+
+#ifndef TESSERACT_CCUTIL_BOXREAD_H__
+#define TESSERACT_CCUTIL_BOXREAD_H__
+
+#include <stdio.h>
+#include "genericvector.h"
+#include "strngs.h"
+
+class STRING;
+class TBOX;
+
+// Size of buffer used to read a line from a box file.
+const int kBoxReadBufSize = 1024;
+
+// Open the boxfile based on the given image filename.
+// Returns NULL if the box file cannot be opened.
+FILE* OpenBoxFile(const STRING& fname);
+
+// Reads all boxes from the given filename.
+// Reads a specific target_page number if >= 0, or all pages otherwise.
+// Skips blanks if skip_blanks is true.
+// The UTF-8 label of the box is put in texts, and the full box definition as
+// a string is put in box_texts, with the corresponding page number in pages.
+// Each of the output vectors is optional (may be NULL).
+// Returns false if no boxes are found.
+bool ReadAllBoxes(int target_page, bool skip_blanks, const STRING& filename,
+                  GenericVector<TBOX>* boxes,
+                  GenericVector<STRING>* texts,
+                  GenericVector<STRING>* box_texts,
+                  GenericVector<int>* pages);
+
+// Reads all boxes from the string. Otherwise, as ReadAllBoxes.
+bool ReadMemBoxes(int target_page, bool skip_blanks, const char* box_data,
+                  GenericVector<TBOX>* boxes,
+                  GenericVector<STRING>* texts,
+                  GenericVector<STRING>* box_texts,
+                  GenericVector<int>* pages);
+
+// Returns the box file name corresponding to the given image_filename.
+STRING BoxFileName(const STRING& image_filename);
+
+// ReadNextBox factors out the code to interpret a line of a box
+// file so that applybox and unicharset_extractor interpret the same way.
+// This function returns the next valid box file utf8 string and coords
+// and returns true, or false on eof (and closes the file).
+// It ignores the utf8 file signature ByteOrderMark (U+FEFF=EF BB BF), checks
+// for valid utf-8 and allows space or tab between fields.
+// utf8_str is set with the unichar string, and bounding box with the box.
+// If there are page numbers in the file, it reads them all.
+bool ReadNextBox(int *line_number, FILE* box_file,
+                 STRING* utf8_str, TBOX* bounding_box);
+// As ReadNextBox above, but get a specific page number. (0-based)
+// Use -1 to read any page number. Files without page number all
+// read as if they are page 0.
+bool ReadNextBox(int target_page, int *line_number, FILE* box_file,
+                 STRING* utf8_str, TBOX* bounding_box);
+
+// Parses the given box file string into a page_number, utf8_str, and
+// bounding_box. Returns true on a successful parse.
+bool ParseBoxFileStr(const char* boxfile_str, int* page_number,
+                     STRING* utf8_str, TBOX* bounding_box);
+
+// Creates a box file string from a unichar string, TBOX and page number.
+void MakeBoxFileStr(const char* unichar_str, const TBOX& box, int page_num,
+                    STRING* box_str);
+
+#endif  // TESSERACT_CCUTIL_BOXREAD_H__
--- a/hgdriver/3rdparty/hgOCR/include/ccstruct/boxword.cpp
+++ b/hgdriver/3rdparty/hgOCR/include/ccstruct/boxword.cpp
@ -0,0 +1,203 @@
+///////////////////////////////////////////////////////////////////////
+// File:        boxword.h
+// Description: Class to represent the bounding boxes of the output.
+// Author:      Ray Smith
+// Created:     Tue May 25 14:18:14 PDT 2010
+//
+// (C) Copyright 2010, Google Inc.
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+// http://www.apache.org/licenses/LICENSE-2.0
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+///////////////////////////////////////////////////////////////////////
+
+#include "blobs.h"
+#include "boxword.h"
+#include "normalis.h"
+#include "ocrblock.h"
+#include "pageres.h"
+
+namespace tesseract {
+
+// Clip output boxes to input blob boxes for bounds that are within this
+// tolerance. Otherwise, the blob may be chopped and we have to just use
+// the word bounding box.
+const int kBoxClipTolerance = 2;
+
+BoxWord::BoxWord() : length_(0) {
+}
+
+BoxWord::BoxWord(const BoxWord& src) {
+  CopyFrom(src);
+}
+
+BoxWord::~BoxWord() {
+}
+
+BoxWord& BoxWord::operator=(const BoxWord& src) {
+  CopyFrom(src);
+  return *this;
+}
+
+void BoxWord::CopyFrom(const BoxWord& src) {
+  bbox_ = src.bbox_;
+  length_ = src.length_;
+  boxes_.clear();
+  boxes_.reserve(length_);
+  for (int i = 0; i < length_; ++i)
+    boxes_.push_back(src.boxes_[i]);
+}
+
+// Factory to build a BoxWord from a TWERD using the DENORMs on each blob to
+// switch back to original image coordinates.
+BoxWord* BoxWord::CopyFromNormalized(TWERD* tessword) {
+  BoxWord* boxword = new BoxWord();
+  // Count the blobs.
+  boxword->length_ = tessword->NumBlobs();
+  // Allocate memory.
+  boxword->boxes_.reserve(boxword->length_);
+
+  for (int b = 0; b < boxword->length_; ++b) {
+    TBLOB* tblob = tessword->blobs[b];
+    TBOX blob_box;
+    for (TESSLINE* outline = tblob->outlines; outline != NULL;
+         outline = outline->next) {
+      EDGEPT* edgept = outline->loop;
+      // Iterate over the edges.
+      do {
+        if (!edgept->IsHidden() || !edgept->prev->IsHidden()) {
+          ICOORD pos(edgept->pos.x, edgept->pos.y);
+          TPOINT denormed;
+          tblob->denorm().DenormTransform(NULL, edgept->pos, &denormed);
+          pos.set_x(denormed.x);
+          pos.set_y(denormed.y);
+          TBOX pt_box(pos, pos);
+          blob_box += pt_box;
+        }
+        edgept = edgept->next;
+      } while (edgept != outline->loop);
+    }
+    boxword->boxes_.push_back(blob_box);
+  }
+  boxword->ComputeBoundingBox();
+  return boxword;
+}
+
+// Clean up the bounding boxes from the polygonal approximation by
+// expanding slightly, then clipping to the blobs from the original_word
+// that overlap. If not null, the block provides the inverse rotation.
+void BoxWord::ClipToOriginalWord(const BLOCK* block, WERD* original_word) {
+  for (int i = 0; i < length_; ++i) {
+    TBOX box = boxes_[i];
+    // Expand by a single pixel, as the poly approximation error is 1 pixel.
+    box = TBOX(box.left() - 1, box.bottom() - 1,
+               box.right() + 1, box.top() + 1);
+    // Now find the original box that matches.
+    TBOX original_box;
+    C_BLOB_IT b_it(original_word->cblob_list());
+    for (b_it.mark_cycle_pt(); !b_it.cycled_list(); b_it.forward()) {
+      TBOX blob_box = b_it.data()->bounding_box();
+      if (block != NULL)
+        blob_box.rotate(block->re_rotation());
+      if (blob_box.major_overlap(box)) {
+        original_box += blob_box;
+      }
+    }
+    if (!original_box.null_box()) {
+      if (NearlyEqual<int>(original_box.left(), box.left(), kBoxClipTolerance))
+        box.set_left(original_box.left());
+      if (NearlyEqual<int>(original_box.right(), box.right(),
+                           kBoxClipTolerance))
+        box.set_right(original_box.right());
+      if (NearlyEqual<int>(original_box.top(), box.top(), kBoxClipTolerance))
+        box.set_top(original_box.top());
+      if (NearlyEqual<int>(original_box.bottom(), box.bottom(),
+                           kBoxClipTolerance))
+        box.set_bottom(original_box.bottom());
+    }
+    original_box = original_word->bounding_box();
+    if (block != NULL)
+      original_box.rotate(block->re_rotation());
+    boxes_[i] = box.intersection(original_box);
+  }
+  ComputeBoundingBox();
+}
+
+// Merges the boxes from start to end, not including end, and deletes
+// the boxes between start and end.
+void BoxWord::MergeBoxes(int start, int end) {
+  start = ClipToRange(start, 0, length_);
+  end = ClipToRange(end, 0, length_);
+  if (end <= start + 1)
+    return;
+  for (int i = start + 1; i < end; ++i) {
+    boxes_[start] += boxes_[i];
+  }
+  int shrinkage = end - 1 - start;
+  length_ -= shrinkage;
+  for (int i = start + 1; i < length_; ++i)
+    boxes_[i] = boxes_[i + shrinkage];
+  boxes_.truncate(length_);
+}
+
+// Inserts a new box before the given index.
+// Recomputes the bounding box.
+void BoxWord::InsertBox(int index, const TBOX& box) {
+  if (index < length_)
+    boxes_.insert(box, index);
+  else
+    boxes_.push_back(box);
+  length_ = boxes_.size();
+  ComputeBoundingBox();
+}
+
+// Changes the box at the given index to the new box.
+// Recomputes the bounding box.
+void BoxWord::ChangeBox(int index, const TBOX& box) {
+  boxes_[index] = box;
+  ComputeBoundingBox();
+}
+
+// Deletes the box with the given index, and shuffles up the rest.
+// Recomputes the bounding box.
+void BoxWord::DeleteBox(int index) {
+  ASSERT_HOST(0 <= index && index < length_);
+  boxes_.remove(index);
+  --length_;
+  ComputeBoundingBox();
+}
+
+// Deletes all the boxes stored in BoxWord.
+void BoxWord::DeleteAllBoxes() {
+  length_ = 0;
+  boxes_.clear();
+  bbox_ = TBOX();
+}
+
+// Computes the bounding box of the word.
+void BoxWord::ComputeBoundingBox() {
+  bbox_ = TBOX();
+  for (int i = 0; i < length_; ++i)
+    bbox_ += boxes_[i];
+}
+
+// This and other putatively are the same, so call the (permanent) callback
+// for each blob index where the bounding boxes match.
+// The callback is deleted on completion.
+void BoxWord::ProcessMatchedBlobs(const TWERD& other,
+                                  TessCallback1<int>* cb) const {
+  for (int i = 0; i < length_ && i < other.NumBlobs(); ++i) {
+    TBOX blob_box = other.blobs[i]->bounding_box();
+    if (blob_box == boxes_[i])
+      cb->Run(i);
+  }
+  delete cb;
+}
+
+}  // namespace tesseract.
--- a/hgdriver/3rdparty/hgOCR/include/ccstruct/boxword.h
+++ b/hgdriver/3rdparty/hgOCR/include/ccstruct/boxword.h
@ -0,0 +1,101 @@
+///////////////////////////////////////////////////////////////////////
+// File:        boxword.h
+// Description: Class to represent the bounding boxes of the output.
+// Author:      Ray Smith
+// Created:     Tue May 25 14:18:14 PDT 2010
+//
+// (C) Copyright 2010, Google Inc.
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+// http://www.apache.org/licenses/LICENSE-2.0
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+///////////////////////////////////////////////////////////////////////
+
+#ifndef TESSERACT_CSTRUCT_BOXWORD_H__
+#define TESSERACT_CSTRUCT_BOXWORD_H__
+
+#include "genericvector.h"
+#include "rect.h"
+#include "unichar.h"
+
+class BLOCK;
+class DENORM;
+struct TWERD;
+class UNICHARSET;
+class WERD;
+class WERD_CHOICE;
+class WERD_RES;
+
+namespace tesseract {
+
+// Class to hold an array of bounding boxes for an output word and
+// the bounding box of the whole word.
+class BoxWord {
+ public:
+  BoxWord();
+  explicit BoxWord(const BoxWord& src);
+  ~BoxWord();
+
+  BoxWord& operator=(const BoxWord& src);
+
+  void CopyFrom(const BoxWord& src);
+
+  // Factory to build a BoxWord from a TWERD using the DENORMs on each blob to
+  // switch back to original image coordinates.
+  static BoxWord* CopyFromNormalized(TWERD* tessword);
+
+  // Clean up the bounding boxes from the polygonal approximation by
+  // expanding slightly, then clipping to the blobs from the original_word
+  // that overlap. If not null, the block provides the inverse rotation.
+  void ClipToOriginalWord(const BLOCK* block, WERD* original_word);
+
+  // Merges the boxes from start to end, not including end, and deletes
+  // the boxes between start and end.
+  void MergeBoxes(int start, int end);
+
+  // Inserts a new box before the given index.
+  // Recomputes the bounding box.
+  void InsertBox(int index, const TBOX& box);
+
+  // Changes the box at the given index to the new box.
+  // Recomputes the bounding box.
+  void ChangeBox(int index, const TBOX& box);
+
+  // Deletes the box with the given index, and shuffles up the rest.
+  // Recomputes the bounding box.
+  void DeleteBox(int index);
+
+  // Deletes all the boxes stored in BoxWord.
+  void DeleteAllBoxes();
+
+  // This and other putatively are the same, so call the (permanent) callback
+  // for each blob index where the bounding boxes match.
+  // The callback is deleted on completion.
+  void ProcessMatchedBlobs(const TWERD& other, TessCallback1<int>* cb) const;
+
+  const TBOX& bounding_box() const {
+    return bbox_;
+  }
+  int length() const { return length_; }
+  const TBOX& BlobBox(int index) const {
+    return boxes_[index];
+  }
+
+ private:
+  void ComputeBoundingBox();
+
+  TBOX bbox_;
+  int length_;
+  GenericVector<TBOX> boxes_;
+};
+
+}  // namespace tesseract.
+
+
+#endif  // TESSERACT_CSTRUCT_BOXWORD_H__
--- a/hgdriver/3rdparty/hgOCR/include/ccstruct/ccstruct.cpp
+++ b/hgdriver/3rdparty/hgOCR/include/ccstruct/ccstruct.cpp
@ -0,0 +1,36 @@
+///////////////////////////////////////////////////////////////////////
+// File:        ccstruct.cpp
+// Description: ccstruct class.
+// Author:      Samuel Charron
+//
+// (C) Copyright 2006, Google Inc.
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+// http://www.apache.org/licenses/LICENSE-2.0
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+///////////////////////////////////////////////////////////////////////
+
+#include "ccstruct.h"
+
+namespace tesseract  {
+
+// APPROXIMATIONS of the fractions of the character cell taken by
+// the descenders, ascenders, and x-height.
+const double CCStruct::kDescenderFraction = 0.25;
+const double CCStruct::kXHeightFraction = 0.5;
+const double CCStruct::kAscenderFraction = 0.25;
+const double CCStruct::kXHeightCapRatio = CCStruct::kXHeightFraction /
+    (CCStruct::kXHeightFraction + CCStruct::kAscenderFraction);
+
+CCStruct::CCStruct() {}
+
+CCStruct::~CCStruct() {
+}
+
+}
--- a/hgdriver/3rdparty/hgOCR/include/ccstruct/ccstruct.h
+++ b/hgdriver/3rdparty/hgOCR/include/ccstruct/ccstruct.h
@ -0,0 +1,44 @@
+///////////////////////////////////////////////////////////////////////
+// File:        ccstruct.h
+// Description: ccstruct class.
+// Author:      Samuel Charron
+//
+// (C) Copyright 2006, Google Inc.
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+// http://www.apache.org/licenses/LICENSE-2.0
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+///////////////////////////////////////////////////////////////////////
+
+#ifndef TESSERACT_CCSTRUCT_CCSTRUCT_H__
+#define TESSERACT_CCSTRUCT_CCSTRUCT_H__
+
+#include "cutil.h"
+
+namespace tesseract {
+class CCStruct : public CUtil {
+ public:
+  CCStruct();
+  ~CCStruct();
+
+  // Globally accessible constants.
+  // APPROXIMATIONS of the fractions of the character cell taken by
+  // the descenders, ascenders, and x-height.
+  static const double kDescenderFraction;  // = 0.25;
+  static const double kXHeightFraction;    // = 0.5;
+  static const double kAscenderFraction;   // = 0.25;
+  // Derived value giving the x-height as a fraction of cap-height.
+  static const double kXHeightCapRatio;    // = XHeight/(XHeight + Ascender).
+};
+
+class Tesseract;
+}  // namespace tesseract
+
+
+#endif  // TESSERACT_CCSTRUCT_CCSTRUCT_H__
--- a/hgdriver/3rdparty/hgOCR/include/ccstruct/coutln.cpp
+++ b/hgdriver/3rdparty/hgOCR/include/ccstruct/coutln.cpp
--- a/hgdriver/3rdparty/hgOCR/include/ccstruct/coutln.h
+++ b/hgdriver/3rdparty/hgOCR/include/ccstruct/coutln.h
@ -0,0 +1,291 @@
+/**********************************************************************
+ * File:						coutln.c      (Formerly:  coutline.c)
+ * Description: Code for the C_OUTLINE class.
+ * Author:					Ray Smith
+ * Created:					Mon Oct 07 16:01:57 BST 1991
+ *
+ * (C) Copyright 1991, Hewlett-Packard Ltd.
+ ** Licensed under the Apache License, Version 2.0 (the "License");
+ ** you may not use this file except in compliance with the License.
+ ** You may obtain a copy of the License at
+ ** http://www.apache.org/licenses/LICENSE-2.0
+ ** Unless required by applicable law or agreed to in writing, software
+ ** distributed under the License is distributed on an "AS IS" BASIS,
+ ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ ** See the License for the specific language governing permissions and
+ ** limitations under the License.
+ *
+ **********************************************************************/
+
+#ifndef           COUTLN_H
+#define           COUTLN_H
+
+#include          "crakedge.h"
+#include          "mod128.h"
+#include          "bits16.h"
+#include          "rect.h"
+#include          "blckerr.h"
+#include          "scrollview.h"
+
+class DENORM;
+
+#define INTERSECTING    MAX_INT16//no winding number
+
+                                 //mask to get step
+#define STEP_MASK       3
+
+enum C_OUTLINE_FLAGS
+{
+  COUT_INVERSE                   //White on black blob
+};
+
+// Simple struct to hold the 3 values needed to compute a more precise edge
+// position and direction. The offset_numerator is the difference between the
+// grey threshold and the mean pixel value. pixel_diff is the difference between
+// the pixels in the edge. Consider the following row of pixels: p1 p2 p3 p4 p5
+// Say the image was thresholded  at threshold t, making p1, p2, p3 black
+// and p4, p5 white (p1, p2, p3 < t, and p4, p5 >= t), but suppose that
+// max(p[i+1] - p[i]) is p3 - p2. Then the extrapolated position of the edge,
+// based on the maximum gradient, is at the crack between p2 and p3 plus the
+// offset (t - (p2+p3)/2)/(p3 - p2). We store the pixel difference p3-p2
+// denominator in pixel_diff and the offset numerator, relative to the original
+// binary edge (t - (p2+p3)/2) - (p3 -p2) in offset_numerator.
+// The sign of offset_numerator and pixel_diff are manipulated to ensure
+// that the pixel_diff, which will be used as a weight, is always positive.
+// The direction stores the quantized feature direction for the given step
+// computed from the edge gradient. (Using binary_angle_plus_pi.)
+// If the pixel_diff is zero, it means that the direction of the gradient
+// is in conflict with the step direction, so this step is to be ignored.
+struct EdgeOffset {
+  inT8 offset_numerator;
+  uinT8 pixel_diff;
+  uinT8 direction;
+};
+
+class DLLSYM C_OUTLINE;          //forward declaration
+struct Pix;
+
+ELISTIZEH (C_OUTLINE)
+class DLLSYM C_OUTLINE:public ELIST_LINK {
+ public:
+  C_OUTLINE() {  //empty constructor
+      steps = NULL;
+      offsets = NULL;
+    }
+    C_OUTLINE(                     //constructor
+              CRACKEDGE *startpt,  //from edge detector
+              ICOORD bot_left,     //bounding box //length of loop
+              ICOORD top_right,
+              inT16 length);
+    C_OUTLINE(ICOORD startpt,    //start of loop
+              DIR128 *new_steps,  //steps in loop
+              inT16 length);     //length of loop
+                                 //outline to copy
+    C_OUTLINE(C_OUTLINE *srcline, FCOORD rotation);  //and rotate
+
+    // Build a fake outline, given just a bounding box and append to the list.
+    static void FakeOutline(const TBOX& box, C_OUTLINE_LIST* outlines);
+
+    ~C_OUTLINE () {              //destructor
+      if (steps != NULL)
+        free_mem(steps);
+      steps = NULL;
+      delete [] offsets;
+    }
+
+    BOOL8 flag(                               //test flag
+               C_OUTLINE_FLAGS mask) const {  //flag to test
+      return flags.bit (mask);
+    }
+    void set_flag(                       //set flag value
+                  C_OUTLINE_FLAGS mask,  //flag to test
+                  BOOL8 value) {         //value to set
+      flags.set_bit (mask, value);
+    }
+
+    C_OUTLINE_LIST *child() {  //get child list
+      return &children;
+    }
+
+                                 //access function
+    const TBOX &bounding_box() const {
+      return box;
+    }
+    void set_step(                    //set a step
+                  inT16 stepindex,    //index of step
+                  inT8 stepdir) {     //chain code
+      int shift = stepindex%4 * 2;
+      uinT8 mask = 3 << shift;
+      steps[stepindex/4] = ((stepdir << shift) & mask) |
+                           (steps[stepindex/4] & ~mask);
+      //squeeze 4 into byte
+    }
+    void set_step(                    //set a step
+                  inT16 stepindex,    //index of step
+                  DIR128 stepdir) {   //direction
+                                 //clean it
+      inT8 chaindir = stepdir.get_dir() >> (DIRBITS - 2);
+                                 //difference
+      set_step(stepindex, chaindir);
+      //squeeze 4 into byte
+    }
+
+    inT32 pathlength() const {  //get path length
+      return stepcount;
+    }
+    // Return step at a given index as a DIR128.
+    DIR128 step_dir(int index) const {
+      return DIR128((inT16)(((steps[index/4] >> (index%4 * 2)) & STEP_MASK) <<
+                      (DIRBITS - 2)));
+    }
+    // Return the step vector for the given outline position.
+    ICOORD step(int index) const { // index of step
+      return step_coords[chain_code(index)];
+    }
+                                 // get start position
+    const ICOORD &start_pos() const {
+      return start;
+    }
+    // Returns the position at the given index on the outline.
+    // NOT to be used lightly, as it has to iterate the outline to find out.
+    ICOORD position_at_index(int index) const {
+      ICOORD pos = start;
+      for (int i = 0; i < index; ++i)
+        pos += step(i);
+      return pos;
+    }
+    // Returns the sub-pixel accurate position given the integer position pos
+    // at the given index on the outline. pos may be a return value of
+    // position_at_index, or computed by repeatedly adding step to the
+    // start_pos() in the usual way.
+    FCOORD sub_pixel_pos_at_index(const ICOORD& pos, int index) const {
+      const ICOORD& step_to_next(step(index));
+      FCOORD f_pos(pos.x() + step_to_next.x() / 2.0f,
+                   pos.y() + step_to_next.y() / 2.0f);
+      if (offsets != NULL && offsets[index].pixel_diff > 0) {
+        float offset = offsets[index].offset_numerator;
+        offset /= offsets[index].pixel_diff;
+        if (step_to_next.x() != 0)
+          f_pos.set_y(f_pos.y() + offset);
+        else
+          f_pos.set_x(f_pos.x() + offset);
+      }
+      return f_pos;
+    }
+    // Returns the step direction for the given index or -1 if there is none.
+    int direction_at_index(int index) const {
+      if (offsets != NULL && offsets[index].pixel_diff > 0)
+        return offsets[index].direction;
+      return -1;
+    }
+    // Returns the edge strength for the given index.
+    // If there are no recorded edge strengths, returns 1 (assuming the image
+    // is binary). Returns 0 if the gradient direction conflicts with the
+    // step direction, indicating that this position could be skipped.
+    int edge_strength_at_index(int index) const {
+      if (offsets != NULL)
+        return offsets[index].pixel_diff;
+      return 1;
+    }
+    // Return the step as a chain code (0-3) related to the standard feature
+    // direction of binary_angle_plus_pi by:
+    // chain_code * 64 = feature direction.
+    int chain_code(int index) const {  // index of step
+      return (steps[index / 4] >> (index % 4 * 2)) & STEP_MASK;
+    }
+
+    inT32 area() const;  // Returns area of self and 1st level children.
+    inT32 perimeter() const;  // Total perimeter of self and 1st level children.
+    inT32 outer_area() const;  // Returns area of self only.
+    inT32 count_transitions(                   //count maxima
+                            inT32 threshold);  //size threshold
+
+    BOOL8 operator< (            //containment test
+      const C_OUTLINE & other) const;
+    BOOL8 operator> (            //containment test
+      C_OUTLINE & other) const
+    {
+      return other < *this;      //use the < to do it
+    }
+    inT16 winding_number(                       //get winding number
+                         ICOORD testpt) const;  //around this point
+                                 //get direction
+    inT16 turn_direction() const;
+    void reverse();  //reverse direction
+
+    void move(                    // reposition outline
+              const ICOORD vec);  // by vector
+
+    // Returns true if *this and its children are legally nested.
+    // The outer area of a child should have the opposite sign to the
+    // parent. If not, it means we have discarded an outline in between
+    // (probably due to excessive length).
+    bool IsLegallyNested() const;
+
+    // If this outline is smaller than the given min_size, delete this and
+    // remove from its list, via *it, after checking that *it points to this.
+    // Otherwise, if any children of this are too small, delete them.
+    // On entry, *it must be an iterator pointing to this. If this gets deleted
+    // then this is extracted from *it, so an iteration can continue.
+    void RemoveSmallRecursive(int min_size, C_OUTLINE_IT* it);
+
+    // Adds sub-pixel resolution EdgeOffsets for the outline if the supplied
+    // pix is 8-bit. Does nothing otherwise.
+    void ComputeEdgeOffsets(int threshold, Pix* pix);
+    // Adds sub-pixel resolution EdgeOffsets for the outline using only
+    // a binary image source.
+    void ComputeBinaryOffsets();
+
+    // Renders the outline to the given pix, with left and top being
+    // the coords of the upper-left corner of the pix.
+    void render(int left, int top, Pix* pix) const;
+
+    // Renders just the outline to the given pix (no fill), with left and top
+    // being the coords of the upper-left corner of the pix.
+    void render_outline(int left, int top, Pix* pix) const;
+
+    #ifndef GRAPHICS_DISABLED
+    void plot(                       //draw one
+              ScrollView* window,         //window to draw in
+              ScrollView::Color colour) const;  //colour to draw it
+    // Draws the outline in the given colour, normalized using the given denorm,
+    // making use of sub-pixel accurate information if available.
+    void plot_normed(const DENORM& denorm, ScrollView::Color colour,
+                     ScrollView* window) const;
+    #endif  // GRAPHICS_DISABLED
+
+    C_OUTLINE& operator=(const C_OUTLINE& source);
+
+    static C_OUTLINE* deep_copy(const C_OUTLINE* src) {
+      C_OUTLINE* outline = new C_OUTLINE;
+      *outline = *src;
+      return outline;
+    }
+
+    static ICOORD chain_step(int chaindir);
+
+    // The maximum length of any outline. The stepcount is stored as 16 bits,
+    // but it is probably not a good idea to increase this constant by much
+    // and switch to 32 bits, as it plays an important role in keeping huge
+    // outlines invisible, which prevents bad speed behavior.
+    static const int kMaxOutlineLength = 16000;
+
+  private:
+    // Helper for ComputeBinaryOffsets. Increments pos, dir_counts, pos_totals
+    // by the step, increment, and vertical step ? x : y position * increment
+    // at step s Mod stepcount respectively. Used to add or subtract the
+    // direction and position to/from accumulators of a small neighbourhood.
+    void increment_step(int s, int increment, ICOORD* pos, int* dir_counts,
+                        int* pos_totals) const;
+    int step_mem() const { return (stepcount+3) / 4; }
+
+    TBOX box;                    // bounding box
+    ICOORD start;                // start coord
+    inT16 stepcount;             // no of steps
+    BITS16 flags;                // flags about outline
+    uinT8 *steps;                // step array
+    EdgeOffset* offsets;         // Higher precision edge.
+    C_OUTLINE_LIST children;     // child elements
+    static ICOORD step_coords[4];
+};
+#endif
--- a/hgdriver/3rdparty/hgOCR/include/ccstruct/crakedge.h
+++ b/hgdriver/3rdparty/hgOCR/include/ccstruct/crakedge.h
@ -0,0 +1,37 @@
+/**********************************************************************
+ * File:						crakedge.h      (Formerly: crkedge.h)
+ * Description: Sturctures for the Crack following edge detector.
+ * Author:					Ray Smith
+ * Created:					Fri Mar 22 16:06:38 GMT 1991
+ *
+ * (C) Copyright 1991, Hewlett-Packard Ltd.
+ ** Licensed under the Apache License, Version 2.0 (the "License");
+ ** you may not use this file except in compliance with the License.
+ ** You may obtain a copy of the License at
+ ** http://www.apache.org/licenses/LICENSE-2.0
+ ** Unless required by applicable law or agreed to in writing, software
+ ** distributed under the License is distributed on an "AS IS" BASIS,
+ ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ ** See the License for the specific language governing permissions and
+ ** limitations under the License.
+ *
+ **********************************************************************/
+
+#ifndef           CRAKEDGE_H
+#define           CRAKEDGE_H
+
+#include          "points.h"
+#include          "mod128.h"
+
+class CRACKEDGE {
+ public:
+  CRACKEDGE() {}
+
+  ICOORD pos;                  /*position of crack */
+  inT8 stepx;                  //edge step
+  inT8 stepy;
+  inT8 stepdir;                //chaincode
+  CRACKEDGE *prev;             /*previous point */
+  CRACKEDGE *next;             /*next point */
+};
+#endif
--- a/hgdriver/3rdparty/hgOCR/include/ccstruct/detlinefit.cpp
+++ b/hgdriver/3rdparty/hgOCR/include/ccstruct/detlinefit.cpp
@ -0,0 +1,295 @@
+///////////////////////////////////////////////////////////////////////
+// File:        detlinefit.cpp
+// Description: Deterministic least median squares line fitting.
+// Author:      Ray Smith
+// Created:     Thu Feb 28 14:45:01 PDT 2008
+//
+// (C) Copyright 2008, Google Inc.
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+// http://www.apache.org/licenses/LICENSE-2.0
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+///////////////////////////////////////////////////////////////////////
+
+#include "detlinefit.h"
+#include "statistc.h"
+#include "ndminx.h"
+#include "tprintf.h"
+
+namespace tesseract {
+
+// The number of points to consider at each end.
+const int kNumEndPoints = 3;
+// The minimum number of points at which to switch to number of points
+// for badly fitted lines.
+// To ensure a sensible error metric, kMinPointsForErrorCount should be at
+// least kMaxRealDistance / (1 - %ile) where %ile is the fractile used in
+// ComputeUpperQuartileError.
+const int kMinPointsForErrorCount = 16;
+// The maximum real distance to use before switching to number of
+// mis-fitted points, which will get square-rooted for true distance.
+const int kMaxRealDistance = 2.0;
+
+DetLineFit::DetLineFit() : square_length_(0.0) {
+}
+
+DetLineFit::~DetLineFit() {
+}
+
+// Delete all Added points.
+void DetLineFit::Clear() {
+  pts_.clear();
+  distances_.clear();
+}
+
+// Add a new point. Takes a copy - the pt doesn't need to stay in scope.
+void DetLineFit::Add(const ICOORD& pt) {
+  pts_.push_back(PointWidth(pt, 0));
+}
+// Associates a half-width with the given point if a point overlaps the
+// previous point by more than half the width, and its distance is further
+// than the previous point, then the more distant point is ignored in the
+// distance calculation. Useful for ignoring i dots and other diacritics.
+void DetLineFit::Add(const ICOORD& pt, int halfwidth) {
+  pts_.push_back(PointWidth(pt, halfwidth));
+}
+
+// Fits a line to the points, ignoring the skip_first initial points and the
+// skip_last final points, returning the fitted line as a pair of points,
+// and the upper quartile error.
+double DetLineFit::Fit(int skip_first, int skip_last,
+                       ICOORD* pt1, ICOORD* pt2) {
+  // Do something sensible with no points.
+  if (pts_.empty()) {
+    pt1->set_x(0);
+    pt1->set_y(0);
+    *pt2 = *pt1;
+    return 0.0;
+  }
+  // Count the points and find the first and last kNumEndPoints.
+  int pt_count = pts_.size();
+  ICOORD* starts[kNumEndPoints];
+  if (skip_first >= pt_count) skip_first = pt_count - 1;
+  int start_count = 0;
+  int end_i = MIN(skip_first + kNumEndPoints, pt_count);
+  for (int i = skip_first; i < end_i; ++i) {
+    starts[start_count++] = &pts_[i].pt;
+  }
+  ICOORD* ends[kNumEndPoints];
+  if (skip_last >= pt_count) skip_last = pt_count - 1;
+  int end_count = 0;
+  end_i = MAX(0, pt_count - kNumEndPoints - skip_last);
+  for (int i = pt_count - 1 - skip_last; i >= end_i; --i) {
+    ends[end_count++] = &pts_[i].pt;
+  }
+  // 1 or 2 points need special treatment.
+  if (pt_count <= 2) {
+    *pt1 = *starts[0];
+    if (pt_count > 1)
+      *pt2 = *ends[0];
+    else
+      *pt2 = *pt1;
+    return 0.0;
+  }
+  // Although with between 2 and 2*kNumEndPoints-1 points, there will be
+  // overlap in the starts, ends sets, this is OK and taken care of by the
+  // if (*start != *end) test below, which also tests for equal input points.
+  double best_uq = -1.0;
+  // Iterate each pair of points and find the best fitting line.
+  for (int i = 0; i < start_count; ++i) {
+    ICOORD* start = starts[i];
+    for (int j = 0; j < end_count; ++j) {
+      ICOORD* end = ends[j];
+      if (*start != *end) {
+        ComputeDistances(*start, *end);
+        // Compute the upper quartile error from the line.
+        double dist = EvaluateLineFit();
+        if (dist < best_uq || best_uq < 0.0) {
+          best_uq = dist;
+          *pt1 = *start;
+          *pt2 = *end;
+        }
+      }
+    }
+  }
+  // Finally compute the square root to return the true distance.
+  return best_uq > 0.0 ? sqrt(best_uq) : best_uq;
+}
+
+// Constrained fit with a supplied direction vector. Finds the best line_pt,
+// that is one of the supplied points having the median cross product with
+// direction, ignoring points that have a cross product outside of the range
+// [min_dist, max_dist]. Returns the resulting error metric using the same
+// reduced set of points.
+// *Makes use of floating point arithmetic*
+double DetLineFit::ConstrainedFit(const FCOORD& direction,
+                                  double min_dist, double max_dist,
+                                  bool debug, ICOORD* line_pt) {
+  ComputeConstrainedDistances(direction, min_dist, max_dist);
+  // Do something sensible with no points or computed distances.
+  if (pts_.empty() || distances_.empty()) {
+    line_pt->set_x(0);
+    line_pt->set_y(0);
+    return 0.0;
+  }
+  int median_index = distances_.choose_nth_item(distances_.size() / 2);
+  *line_pt = distances_[median_index].data;
+  if (debug) {
+    tprintf("Constrained fit to dir %g, %g = %d, %d :%d distances:\n",
+            direction.x(), direction.y(),
+            line_pt->x(), line_pt->y(), distances_.size());
+    for (int i = 0; i < distances_.size(); ++i) {
+      tprintf("%d: %d, %d -> %g\n", i, distances_[i].data.x(),
+              distances_[i].data.y(), distances_[i].key);
+    }
+    tprintf("Result = %d\n", median_index);
+  }
+  // Center distances on the fitted point.
+  double dist_origin = direction * *line_pt;
+  for (int i = 0; i < distances_.size(); ++i) {
+    distances_[i].key -= dist_origin;
+  }
+  return sqrt(EvaluateLineFit());
+}
+
+// Returns true if there were enough points at the last call to Fit or
+// ConstrainedFit for the fitted points to be used on a badly fitted line.
+bool DetLineFit::SufficientPointsForIndependentFit() const {
+  return distances_.size() >= kMinPointsForErrorCount;
+}
+
+// Backwards compatible fit returning a gradient and constant.
+// Deprecated. Prefer Fit(ICOORD*, ICOORD*) where possible, but use this
+// function in preference to the LMS class.
+double DetLineFit::Fit(float* m, float* c) {
+  ICOORD start, end;
+  double error = Fit(&start, &end);
+  if (end.x() != start.x()) {
+    *m = static_cast<float>(end.y() - start.y()) / (end.x() - start.x());
+    *c = start.y() - *m * start.x();
+  } else {
+    *m = 0.0f;
+    *c = 0.0f;
+  }
+  return error;
+}
+
+// Backwards compatible constrained fit with a supplied gradient.
+// Deprecated. Use ConstrainedFit(const FCOORD& direction) where possible
+// to avoid potential difficulties with infinite gradients.
+double DetLineFit::ConstrainedFit(double m, float* c) {
+  // Do something sensible with no points.
+  if (pts_.empty()) {
+    *c = 0.0f;
+    return 0.0;
+  }
+  double cos = 1.0 / sqrt(1.0 + m * m);
+  FCOORD direction(cos, m * cos);
+  ICOORD line_pt;
+  double error = ConstrainedFit(direction, -MAX_FLOAT32, MAX_FLOAT32, false,
+                                &line_pt);
+  *c = line_pt.y() - line_pt.x() * m;
+  return error;
+}
+
+// Computes and returns the squared evaluation metric for a line fit.
+double DetLineFit::EvaluateLineFit() {
+  // Compute the upper quartile error from the line.
+  double dist = ComputeUpperQuartileError();
+  if (distances_.size() >= kMinPointsForErrorCount &&
+      dist > kMaxRealDistance * kMaxRealDistance) {
+    // Use the number of mis-fitted points as the error metric, as this
+    // gives a better measure of fit for badly fitted lines where more
+    // than a quarter are badly fitted.
+    double threshold = kMaxRealDistance * sqrt(square_length_);
+    dist = NumberOfMisfittedPoints(threshold);
+  }
+  return dist;
+}
+
+// Computes the absolute error distances of the points from the line,
+// and returns the squared upper-quartile error distance.
+double DetLineFit::ComputeUpperQuartileError() {
+  int num_errors = distances_.size();
+  if (num_errors == 0) return 0.0;
+  // Get the absolute values of the errors.
+  for (int i = 0; i < num_errors; ++i) {
+    if (distances_[i].key < 0) distances_[i].key = -distances_[i].key;
+  }
+  // Now get the upper quartile distance.
+  int index = distances_.choose_nth_item(3 * num_errors / 4);
+  double dist = distances_[index].key;
+  // The true distance is the square root of the dist squared / square_length.
+  // Don't bother with the square root. Just return the square distance.
+  return square_length_ > 0.0 ? dist * dist / square_length_ : 0.0;
+}
+
+// Returns the number of sample points that have an error more than threshold.
+int DetLineFit::NumberOfMisfittedPoints(double threshold) const {
+  int num_misfits = 0;
+  int num_dists = distances_.size();
+  // Get the absolute values of the errors.
+  for (int i = 0; i < num_dists; ++i) {
+    if (distances_[i].key > threshold)
+      ++num_misfits;
+  }
+  return num_misfits;
+}
+
+// Computes all the cross product distances of the points from the line,
+// storing the actual (signed) cross products in distances.
+// Ignores distances of points that are further away than the previous point,
+// and overlaps the previous point by at least half.
+void DetLineFit::ComputeDistances(const ICOORD& start, const ICOORD& end) {
+  distances_.truncate(0);
+  ICOORD line_vector = end;
+  line_vector -= start;
+  square_length_ = line_vector.sqlength();
+  int line_length = IntCastRounded(sqrt(square_length_));
+  // Compute the distance of each point from the line.
+  int prev_abs_dist = 0;
+  int prev_dot = 0;
+  for (int i = 0; i < pts_.size(); ++i) {
+    ICOORD pt_vector = pts_[i].pt;
+    pt_vector -= start;
+    int dot = line_vector % pt_vector;
+    // Compute |line_vector||pt_vector|sin(angle between)
+    int dist = line_vector * pt_vector;
+    int abs_dist = dist < 0 ? -dist : dist;
+    if (abs_dist > prev_abs_dist && i > 0) {
+      // Ignore this point if it overlaps the previous one.
+      int separation = abs(dot - prev_dot);
+      if (separation < line_length * pts_[i].halfwidth ||
+          separation < line_length * pts_[i - 1].halfwidth)
+        continue;
+    }
+    distances_.push_back(DistPointPair(dist, pts_[i].pt));
+    prev_abs_dist = abs_dist;
+    prev_dot = dot;
+  }
+}
+
+// Computes all the cross product distances of the points perpendicular to
+// the given direction, ignoring distances outside of the give distance range,
+// storing the actual (signed) cross products in distances_.
+void DetLineFit::ComputeConstrainedDistances(const FCOORD& direction,
+                                             double min_dist, double max_dist) {
+  distances_.truncate(0);
+  square_length_ = direction.sqlength();
+  // Compute the distance of each point from the line.
+  for (int i = 0; i < pts_.size(); ++i) {
+    FCOORD pt_vector = pts_[i].pt;
+    // Compute |line_vector||pt_vector|sin(angle between)
+    double dist = direction * pt_vector;
+    if (min_dist <= dist && dist <= max_dist)
+      distances_.push_back(DistPointPair(dist, pts_[i].pt));
+  }
+}
+
+}  // namespace tesseract.
--- a/hgdriver/3rdparty/hgOCR/include/ccstruct/detlinefit.h
+++ b/hgdriver/3rdparty/hgOCR/include/ccstruct/detlinefit.h
@ -0,0 +1,164 @@
+///////////////////////////////////////////////////////////////////////
+// File:        detlinefit.h
+// Description: Deterministic least upper-quartile squares line fitting.
+// Author:      Ray Smith
+// Created:     Thu Feb 28 14:35:01 PDT 2008
+//
+// (C) Copyright 2008, Google Inc.
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+// http://www.apache.org/licenses/LICENSE-2.0
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+///////////////////////////////////////////////////////////////////////
+
+#ifndef TESSERACT_CCSTRUCT_DETLINEFIT_H_
+#define TESSERACT_CCSTRUCT_DETLINEFIT_H_
+
+#include "genericvector.h"
+#include "kdpair.h"
+#include "points.h"
+
+namespace tesseract {
+
+// This class fits a line to a set of ICOORD points.
+// There is no restriction on the direction of the line, as it
+// uses a vector method, ie no concern over infinite gradients.
+// The fitted line has the least upper quartile of squares of perpendicular
+// distances of all source points from the line, subject to the constraint
+// that the line is made from one of the pairs of [{p1,p2,p3},{pn-2, pn-1, pn}]
+// i.e. the 9 combinations of one of the first 3 and last 3 points.
+// A fundamental assumption of this algorithm is that one of the first 3 and
+// one of the last 3 points are near the best line fit.
+// The points must be Added in line order for the algorithm to work properly.
+// No floating point calculations are needed* to make an accurate fit,
+// and no random numbers are needed** so the algorithm is deterministic,
+// architecture-stable, and compiler-stable as well as stable to minor
+// changes in the input.
+// *A single floating point division is used to compute each line's distance.
+// This is unlikely to result in choice of a different line, but if it does,
+// it would be easy to replace with a 64 bit integer calculation.
+// **Random numbers are used in the nth_item function, but the worst
+// non-determinism that can result is picking a different result among equals,
+// and that wouldn't make any difference to the end-result distance, so the
+// randomness does not affect the determinism of the algorithm. The random
+// numbers are only there to guarantee average linear time.
+// Fitting time is linear, but with a high constant, as it tries 9 different
+// lines and computes the distance of all points each time.
+// This class is aimed at replacing the LLSQ (linear least squares) and
+// LMS (least median of squares) classes that are currently used for most
+// of the line fitting in Tesseract.
+class DetLineFit {
+ public:
+  DetLineFit();
+  ~DetLineFit();
+
+  // Delete all Added points.
+  void Clear();
+
+  // Adds a new point. Takes a copy - the pt doesn't need to stay in scope.
+  // Add must be called on points in sequence along the line.
+  void Add(const ICOORD& pt);
+  // Associates a half-width with the given point if a point overlaps the
+  // previous point by more than half the width, and its distance is further
+  // than the previous point, then the more distant point is ignored in the
+  // distance calculation. Useful for ignoring i dots and other diacritics.
+  void Add(const ICOORD& pt, int halfwidth);
+
+  // Fits a line to the points, returning the fitted line as a pair of
+  // points, and the upper quartile error.
+  double Fit(ICOORD* pt1, ICOORD* pt2) {
+    return Fit(0, 0, pt1, pt2);
+  }
+  // Fits a line to the points, ignoring the skip_first initial points and the
+  // skip_last final points, returning the fitted line as a pair of points,
+  // and the upper quartile error.
+  double Fit(int skip_first, int skip_last, ICOORD* pt1, ICOORD* pt2);
+
+  // Constrained fit with a supplied direction vector. Finds the best line_pt,
+  // that is one of the supplied points having the median cross product with
+  // direction, ignoring points that have a cross product outside of the range
+  // [min_dist, max_dist]. Returns the resulting error metric using the same
+  // reduced set of points.
+  // *Makes use of floating point arithmetic*
+  double ConstrainedFit(const FCOORD& direction,
+                        double min_dist, double max_dist,
+                        bool debug, ICOORD* line_pt);
+
+  // Returns true if there were enough points at the last call to Fit or
+  // ConstrainedFit for the fitted points to be used on a badly fitted line.
+  bool SufficientPointsForIndependentFit() const;
+
+  // Backwards compatible fit returning a gradient and constant.
+  // Deprecated. Prefer Fit(ICOORD*, ICOORD*) where possible, but use this
+  // function in preference to the LMS class.
+  double Fit(float* m, float* c);
+
+  // Backwards compatible constrained fit with a supplied gradient.
+  // Deprecated. Use ConstrainedFit(const FCOORD& direction) where possible
+  // to avoid potential difficulties with infinite gradients.
+  double ConstrainedFit(double m, float* c);
+
+ private:
+  // Simple struct to hold an ICOORD point and a halfwidth representing half
+  // the "width" (supposedly approximately parallel to the direction of the
+  // line) of each point, such that distant points can be discarded when they
+  // overlap nearer points. (Think i dot and other diacritics or noise.)
+  struct PointWidth {
+    PointWidth() : pt(ICOORD(0, 0)), halfwidth(0) {}
+    PointWidth(const ICOORD& pt0, int halfwidth0)
+      : pt(pt0), halfwidth(halfwidth0) {}
+
+    ICOORD pt;
+    int halfwidth;
+  };
+  // Type holds the distance of each point from the fitted line and the point
+  // itself. Use of double allows integer distances from ICOORDs to be stored
+  // exactly, and also the floating point results from ConstrainedFit.
+  typedef KDPairInc<double, ICOORD> DistPointPair;
+
+  // Computes and returns the squared evaluation metric for a line fit.
+  double EvaluateLineFit();
+
+  // Computes the absolute values of the precomputed distances_,
+  // and returns the squared upper-quartile error distance.
+  double ComputeUpperQuartileError();
+
+  // Returns the number of sample points that have an error more than threshold.
+  int NumberOfMisfittedPoints(double threshold) const;
+
+  // Computes all the cross product distances of the points from the line,
+  // storing the actual (signed) cross products in distances_.
+  // Ignores distances of points that are further away than the previous point,
+  // and overlaps the previous point by at least half.
+  void ComputeDistances(const ICOORD& start, const ICOORD& end);
+
+  // Computes all the cross product distances of the points perpendicular to
+  // the given direction, ignoring distances outside of the give distance range,
+  // storing the actual (signed) cross products in distances_.
+  void ComputeConstrainedDistances(const FCOORD& direction,
+                                   double min_dist, double max_dist);
+
+  // Stores all the source points in the order they were given and their
+  // halfwidths, if any.
+  GenericVector<PointWidth> pts_;
+  // Stores the computed perpendicular distances of (some of) the pts_ from a
+  // given vector (assuming it goes through the origin, making it a line).
+  // Since the distances may be a subset of the input points, and get
+  // re-ordered by the nth_item function, the original point is stored
+  // along side the distance.
+  GenericVector<DistPointPair> distances_;  // Distances of points.
+  // The squared length of the vector used to compute distances_.
+  double square_length_;
+};
+
+}  // namespace tesseract.
+
+#endif  // TESSERACT_CCSTRUCT_DETLINEFIT_H_
+
+
--- a/hgdriver/3rdparty/hgOCR/include/ccstruct/dppoint.cpp
+++ b/hgdriver/3rdparty/hgOCR/include/ccstruct/dppoint.cpp
@ -0,0 +1,98 @@
+/**********************************************************************
+ * File:        dppoint.cpp
+ * Description: Simple generic dynamic programming class.
+ * Author:      Ray Smith
+ * Created:     Wed Mar 25 19:08:01 PDT 2009
+ *
+ * (C) Copyright 2009, Google Inc.
+ ** Licensed under the Apache License, Version 2.0 (the "License");
+ ** you may not use this file except in compliance with the License.
+ ** You may obtain a copy of the License at
+ ** http://www.apache.org/licenses/LICENSE-2.0
+ ** Unless required by applicable law or agreed to in writing, software
+ ** distributed under the License is distributed on an "AS IS" BASIS,
+ ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ ** See the License for the specific language governing permissions and
+ ** limitations under the License.
+ *
+ **********************************************************************/
+
+#include "dppoint.h"
+#include "tprintf.h"
+
+namespace tesseract {
+
+// Solve the dynamic programming problem for the given array of points, with
+// the given size and cost function.
+// Steps backwards are limited to being between min_step and max_step
+// inclusive.
+// The return value is the tail of the best path.
+DPPoint* DPPoint::Solve(int min_step, int max_step, bool debug,
+                        CostFunc cost_func, int size, DPPoint* points) {
+  if (size <= 0 || max_step < min_step || min_step >= size)
+    return NULL;  // Degenerate, but not necessarily an error.
+  ASSERT_HOST(min_step > 0);  // Infinite loop possible if this is not true.
+  if (debug)
+    tprintf("min = %d, max=%d\n",
+            min_step, max_step);
+  // Evaluate the total cost at each point.
+  for (int i = 0; i < size; ++i) {
+    for (int offset = min_step; offset <= max_step; ++offset) {
+      DPPoint* prev = offset <= i ? points + i - offset : NULL;
+      inT64 new_cost = (points[i].*cost_func)(prev);
+      if (points[i].best_prev_ != NULL && offset > min_step * 2 &&
+          new_cost > points[i].total_cost_)
+        break;  // Find only the first minimum if going over twice the min.
+    }
+    points[i].total_cost_ += points[i].local_cost_;
+    if (debug) {
+      tprintf("At point %d, local cost=%d, total_cost=%d, steps=%d\n",
+              i, points[i].local_cost_, points[i].total_cost_,
+              points[i].total_steps_);
+    }
+  }
+  // Now find the end of the best path and return it.
+  int best_cost = points[size - 1].total_cost_;
+  int best_end = size - 1;
+  for (int end = best_end - 1; end >= size - min_step; --end) {
+    int cost = points[end].total_cost_;
+    if (cost < best_cost) {
+      best_cost = cost;
+      best_end = end;
+    }
+  }
+  return points + best_end;
+}
+
+// A CostFunc that takes the variance of step into account in the cost.
+inT64 DPPoint::CostWithVariance(const DPPoint* prev) {
+  if (prev == NULL || prev == this) {
+    UpdateIfBetter(0, 1, NULL, 0, 0, 0);
+    return 0;
+  }
+
+  int delta = this - prev;
+  inT32 n = prev->n_ + 1;
+  inT32 sig_x = prev->sig_x_ + delta;
+  inT64 sig_xsq = prev->sig_xsq_ + delta * delta;
+  inT64 cost = (sig_xsq - sig_x * sig_x / n) / n;
+  cost += prev->total_cost_;
+  UpdateIfBetter(cost, prev->total_steps_ + 1, prev, n, sig_x, sig_xsq);
+  return cost;
+}
+
+// Update the other members if the cost is lower.
+void DPPoint::UpdateIfBetter(inT64 cost, inT32 steps, const DPPoint* prev,
+                             inT32 n, inT32 sig_x, inT64 sig_xsq) {
+  if (cost < total_cost_) {
+    total_cost_ = cost;
+    total_steps_ = steps;
+    best_prev_ = prev;
+    n_ = n;
+    sig_x_ = sig_x;
+    sig_xsq_ = sig_xsq;
+  }
+}
+
+}  // namespace tesseract.
+
--- a/hgdriver/3rdparty/hgOCR/include/ccstruct/dppoint.h
+++ b/hgdriver/3rdparty/hgOCR/include/ccstruct/dppoint.h
@ -0,0 +1,102 @@
+/**********************************************************************
+ * File:        dppoint.h
+ * Description: Simple generic dynamic programming class.
+ * Author:      Ray Smith
+ * Created:     Wed Mar 25 18:57:01 PDT 2009
+ *
+ * (C) Copyright 2009, Google Inc.
+ ** Licensed under the Apache License, Version 2.0 (the "License");
+ ** you may not use this file except in compliance with the License.
+ ** You may obtain a copy of the License at
+ ** http://www.apache.org/licenses/LICENSE-2.0
+ ** Unless required by applicable law or agreed to in writing, software
+ ** distributed under the License is distributed on an "AS IS" BASIS,
+ ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ ** See the License for the specific language governing permissions and
+ ** limitations under the License.
+ *
+ **********************************************************************/
+
+#ifndef TESSERACT_CCSTRUCT_DPPOINT_H__
+#define TESSERACT_CCSTRUCT_DPPOINT_H__
+
+#include "host.h"
+
+namespace tesseract {
+
+// A simple class to provide a dynamic programming solution to a class of
+// 1st-order problems in which the cost is dependent only on the current
+// step and the best cost to that step, with a possible special case
+// of using the variance of the steps, and only the top choice is required.
+// Useful for problems such as finding the optimal cut points in a fixed-pitch
+// (vertical or horizontal) situation.
+// Skeletal Example:
+// DPPoint* array = new DPPoint[width];
+// for (int i = 0; i < width; i++) {
+//   array[i].AddLocalCost(cost_at_i)
+// }
+// DPPoint* best_end = DPPoint::Solve(..., array);
+// while (best_end != NULL) {
+//   int cut_index = best_end - array;
+//   best_end = best_end->best_prev();
+// }
+// delete [] array;
+class DPPoint {
+ public:
+  // The cost function evaluates the total cost at this (excluding this's
+  // local_cost) and if it beats this's total_cost, then
+  // replace the appropriate values in this.
+  typedef inT64 (DPPoint::*CostFunc)(const DPPoint* prev);
+
+  DPPoint()
+    : local_cost_(0), total_cost_(MAX_INT32), total_steps_(1), best_prev_(NULL),
+      n_(0), sig_x_(0), sig_xsq_(0) {
+  }
+
+  // Solve the dynamic programming problem for the given array of points, with
+  // the given size and cost function.
+  // Steps backwards are limited to being between min_step and max_step
+  // inclusive.
+  // The return value is the tail of the best path.
+  static DPPoint* Solve(int min_step, int max_step, bool debug,
+                        CostFunc cost_func, int size, DPPoint* points);
+
+  // A CostFunc that takes the variance of step into account in the cost.
+  inT64 CostWithVariance(const DPPoint* prev);
+
+  // Accessors.
+  int total_cost() const {
+    return total_cost_;
+  }
+  int Pathlength() const {
+    return total_steps_;
+  }
+  const DPPoint* best_prev() const {
+    return best_prev_;
+  }
+  void AddLocalCost(int new_cost) {
+    local_cost_ += new_cost;
+  }
+
+ private:
+  // Code common to different cost functions.
+
+  // Update the other members if the cost is lower.
+  void UpdateIfBetter(inT64 cost, inT32 steps, const DPPoint* prev,
+                      inT32 n, inT32 sig_x, inT64 sig_xsq);
+
+  inT32 local_cost_;    // Cost of this point on its own.
+  inT32 total_cost_;    // Sum of all costs in best path to here.
+                        // During cost calculations local_cost is excluded.
+  inT32 total_steps_;   // Number of steps in best path to here.
+  const DPPoint* best_prev_;  // Pointer to prev point in best path from here.
+  // Information for computing the variance part of the cost.
+  inT32 n_;             // Number of steps in best path to here for variance.
+  inT32 sig_x_;         // Sum of step sizes for computing variance.
+  inT64 sig_xsq_;       // Sum of squares of steps for computing variance.
+};
+
+}  // namespace tesseract.
+
+#endif  // TESSERACT_CCSTRUCT_DPPOINT_H__
+
--- a/hgdriver/3rdparty/hgOCR/include/ccstruct/fontinfo.cpp
+++ b/hgdriver/3rdparty/hgOCR/include/ccstruct/fontinfo.cpp
@ -0,0 +1,262 @@
+///////////////////////////////////////////////////////////////////////
+// File:        fontinfo.cpp
+// Description: Font information classes abstracted from intproto.h/cpp.
+// Author:      rays@google.com (Ray Smith)
+// Created:     Wed May 18 10:39:01 PDT 2011
+//
+// (C) Copyright 2011, Google Inc.
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+// http://www.apache.org/licenses/LICENSE-2.0
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+///////////////////////////////////////////////////////////////////////
+
+#include "fontinfo.h"
+#include "bitvector.h"
+#include "unicity_table.h"
+
+namespace tesseract {
+
+// Writes to the given file. Returns false in case of error.
+bool FontInfo::Serialize(FILE* fp) const {
+  if (!write_info(fp, *this)) return false;
+  if (!write_spacing_info(fp, *this)) return false;
+  return true;
+}
+// Reads from the given file. Returns false in case of error.
+// If swap is true, assumes a big/little-endian swap is needed.
+bool FontInfo::DeSerialize(bool swap, FILE* fp) {
+  if (!read_info(fp, this, swap)) return false;
+  if (!read_spacing_info(fp, this, swap)) return false;
+  return true;
+}
+
+FontInfoTable::FontInfoTable() {
+  set_compare_callback(NewPermanentTessCallback(CompareFontInfo));
+  set_clear_callback(NewPermanentTessCallback(FontInfoDeleteCallback));
+}
+
+FontInfoTable::~FontInfoTable() {
+}
+
+// Writes to the given file. Returns false in case of error.
+bool FontInfoTable::Serialize(FILE* fp) const {
+  return this->SerializeClasses(fp);
+}
+// Reads from the given file. Returns false in case of error.
+// If swap is true, assumes a big/little-endian swap is needed.
+bool FontInfoTable::DeSerialize(bool swap, FILE* fp) {
+  truncate(0);
+  return this->DeSerializeClasses(swap, fp);
+}
+
+// Returns true if the given set of fonts includes one with the same
+// properties as font_id.
+bool FontInfoTable::SetContainsFontProperties(
+    int font_id, const GenericVector<ScoredFont>& font_set) const {
+  uinT32 properties = get(font_id).properties;
+  for (int f = 0; f < font_set.size(); ++f) {
+    if (get(font_set[f].fontinfo_id).properties == properties)
+      return true;
+  }
+  return false;
+}
+
+// Returns true if the given set of fonts includes multiple properties.
+bool FontInfoTable::SetContainsMultipleFontProperties(
+    const GenericVector<ScoredFont>& font_set) const {
+  if (font_set.empty()) return false;
+  int first_font = font_set[0].fontinfo_id;
+  uinT32 properties = get(first_font).properties;
+  for (int f = 1; f < font_set.size(); ++f) {
+    if (get(font_set[f].fontinfo_id).properties != properties)
+      return true;
+  }
+  return false;
+}
+
+// Moves any non-empty FontSpacingInfo entries from other to this.
+void FontInfoTable::MoveSpacingInfoFrom(FontInfoTable* other) {
+  set_compare_callback(NewPermanentTessCallback(CompareFontInfo));
+  set_clear_callback(NewPermanentTessCallback(FontInfoDeleteCallback));
+  for (int i = 0; i < other->size(); ++i) {
+    GenericVector<FontSpacingInfo*>* spacing_vec = other->get(i).spacing_vec;
+    if (spacing_vec != NULL) {
+      int target_index = get_index(other->get(i));
+      if (target_index < 0) {
+        // Bit copy the FontInfo and steal all the pointers.
+        push_back(other->get(i));
+        other->get(i).name = NULL;
+      } else {
+        delete [] get(target_index).spacing_vec;
+        get(target_index).spacing_vec = other->get(i).spacing_vec;
+      }
+      other->get(i).spacing_vec = NULL;
+    }
+  }
+}
+
+// Moves this to the target unicity table.
+void FontInfoTable::MoveTo(UnicityTable<FontInfo>* target) {
+  target->clear();
+  target->set_compare_callback(NewPermanentTessCallback(CompareFontInfo));
+  target->set_clear_callback(NewPermanentTessCallback(FontInfoDeleteCallback));
+  for (int i = 0; i < size(); ++i) {
+    // Bit copy the FontInfo and steal all the pointers.
+    target->push_back(get(i));
+    get(i).name = NULL;
+    get(i).spacing_vec = NULL;
+  }
+}
+
+
+// Compare FontInfo structures.
+bool CompareFontInfo(const FontInfo& fi1, const FontInfo& fi2) {
+  // The font properties are required to be the same for two font with the same
+  // name, so there is no need to test them.
+  // Consequently, querying the table with only its font name as information is
+  // enough to retrieve its properties.
+  return strcmp(fi1.name, fi2.name) == 0;
+}
+// Compare FontSet structures.
+bool CompareFontSet(const FontSet& fs1, const FontSet& fs2) {
+  if (fs1.size != fs2.size)
+    return false;
+  for (int i = 0; i < fs1.size; ++i) {
+    if (fs1.configs[i] != fs2.configs[i])
+      return false;
+  }
+  return true;
+}
+
+// Callbacks for GenericVector.
+void FontInfoDeleteCallback(FontInfo f) {
+  if (f.spacing_vec != NULL) {
+    f.spacing_vec->delete_data_pointers();
+    delete f.spacing_vec;
+  }
+  delete[] f.name;
+}
+void FontSetDeleteCallback(FontSet fs) {
+  delete[] fs.configs;
+}
+
+/*---------------------------------------------------------------------------*/
+// Callbacks used by UnicityTable to read/write FontInfo/FontSet structures.
+bool read_info(FILE* f, FontInfo* fi, bool swap) {
+  inT32 size;
+  if (fread(&size, sizeof(size), 1, f) != 1) return false;
+  if (swap)
+    Reverse32(&size);
+  char* font_name = new char[size + 1];
+  fi->name = font_name;
+  if (static_cast<int>(fread(font_name, sizeof(*font_name), size, f)) != size)
+    return false;
+  font_name[size] = '\0';
+  if (fread(&fi->properties, sizeof(fi->properties), 1, f) != 1) return false;
+  if (swap)
+    Reverse32(&fi->properties);
+  return true;
+}
+
+bool write_info(FILE* f, const FontInfo& fi) {
+  inT32 size = strlen(fi.name);
+  if (fwrite(&size, sizeof(size), 1, f) != 1) return false;
+  if (static_cast<int>(fwrite(fi.name, sizeof(*fi.name), size, f)) != size)
+    return false;
+  if (fwrite(&fi.properties, sizeof(fi.properties), 1, f) != 1) return false;
+  return true;
+}
+
+bool read_spacing_info(FILE *f, FontInfo* fi, bool swap) {
+  inT32 vec_size, kern_size;
+  if (fread(&vec_size, sizeof(vec_size), 1, f) != 1) return false;
+  if (swap) Reverse32(&vec_size);
+  ASSERT_HOST(vec_size >= 0);
+  if (vec_size == 0) return true;
+  fi->init_spacing(vec_size);
+  for (int i = 0; i < vec_size; ++i) {
+    FontSpacingInfo *fs = new FontSpacingInfo();
+    if (fread(&fs->x_gap_before, sizeof(fs->x_gap_before), 1, f) != 1 ||
+        fread(&fs->x_gap_after, sizeof(fs->x_gap_after), 1, f) != 1 ||
+        fread(&kern_size, sizeof(kern_size), 1, f) != 1) {
+      delete fs;
+      return false;
+    }
+    if (swap) {
+      ReverseN(&(fs->x_gap_before), sizeof(fs->x_gap_before));
+      ReverseN(&(fs->x_gap_after), sizeof(fs->x_gap_after));
+      Reverse32(&kern_size);
+    }
+    if (kern_size < 0) {  // indication of a NULL entry in fi->spacing_vec
+      delete fs;
+      continue;
+    }
+    if (kern_size > 0 && (!fs->kerned_unichar_ids.DeSerialize(swap, f) ||
+                          !fs->kerned_x_gaps.DeSerialize(swap, f))) {
+      delete fs;
+      return false;
+    }
+    fi->add_spacing(i, fs);
+  }
+  return true;
+}
+
+bool write_spacing_info(FILE* f, const FontInfo& fi) {
+  inT32 vec_size = (fi.spacing_vec == NULL) ? 0 : fi.spacing_vec->size();
+  if (fwrite(&vec_size,  sizeof(vec_size), 1, f) != 1) return false;
+  inT16 x_gap_invalid = -1;
+  for (int i = 0; i < vec_size; ++i) {
+    FontSpacingInfo *fs = fi.spacing_vec->get(i);
+    inT32 kern_size = (fs == NULL) ? -1 : fs->kerned_x_gaps.size();
+    if (fs == NULL) {
+      // Valid to have the identical fwrites. Writing invalid x-gaps.
+      if (fwrite(&(x_gap_invalid), sizeof(x_gap_invalid), 1, f) != 1 ||
+          fwrite(&(x_gap_invalid), sizeof(x_gap_invalid), 1, f) != 1 ||
+          fwrite(&kern_size, sizeof(kern_size), 1, f) != 1) {
+        return false;
+      }
+    } else {
+      if (fwrite(&(fs->x_gap_before), sizeof(fs->x_gap_before), 1, f) != 1 ||
+          fwrite(&(fs->x_gap_after), sizeof(fs->x_gap_after), 1, f) != 1 ||
+          fwrite(&kern_size, sizeof(kern_size), 1, f) != 1) {
+        return false;
+      }
+    }
+    if (kern_size > 0 && (!fs->kerned_unichar_ids.Serialize(f) ||
+                          !fs->kerned_x_gaps.Serialize(f))) {
+      return false;
+    }
+  }
+  return true;
+}
+
+bool read_set(FILE* f, FontSet* fs, bool swap) {
+  if (fread(&fs->size, sizeof(fs->size), 1, f) != 1) return false;
+  if (swap)
+    Reverse32(&fs->size);
+  fs->configs = new int32_t[fs->size];
+  for (int i = 0; i < fs->size; ++i) {
+    if (fread(&fs->configs[i], sizeof(fs->configs[i]), 1, f) != 1) return false;
+    if (swap)
+      Reverse32(&fs->configs[i]);
+  }
+  return true;
+}
+
+bool write_set(FILE* f, const FontSet& fs) {
+  if (fwrite(&fs.size, sizeof(fs.size), 1, f) != 1) return false;
+  for (int i = 0; i < fs.size; ++i) {
+    if (fwrite(&fs.configs[i], sizeof(fs.configs[i]), 1, f) != 1) return false;
+  }
+  return true;
+}
+
+}  // namespace tesseract.
+
--- a/hgdriver/3rdparty/hgOCR/include/ccstruct/fontinfo.h
+++ b/hgdriver/3rdparty/hgOCR/include/ccstruct/fontinfo.h
@ -0,0 +1,191 @@
+///////////////////////////////////////////////////////////////////////
+// File:        fontinfo.h
+// Description: Font information classes abstracted from intproto.h/cpp.
+// Author:      rays@google.com (Ray Smith)
+// Created:     Tue May 17 17:08:01 PDT 2011
+//
+// (C) Copyright 2011, Google Inc.
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+// http://www.apache.org/licenses/LICENSE-2.0
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+///////////////////////////////////////////////////////////////////////
+
+
+#ifndef TESSERACT_CCSTRUCT_FONTINFO_H_
+#define TESSERACT_CCSTRUCT_FONTINFO_H_
+
+#include "genericvector.h"
+#include "host.h"
+#include "unichar.h"
+
+#include <stdint.h>
+
+template <typename T> class UnicityTable;
+
+namespace tesseract {
+
+class BitVector;
+
+// Simple struct to hold a font and a score. The scores come from the low-level
+// integer matcher, so they are in the uinT16 range. Fonts are an index to
+// fontinfo_table.
+// These get copied around a lot, so best to keep them small.
+struct ScoredFont {
+  ScoredFont() : fontinfo_id(-1), score(0) {}
+  ScoredFont(int font_id, uinT16 classifier_score)
+      : fontinfo_id(font_id), score(classifier_score) {}
+
+  // Index into fontinfo table, but inside the classifier, may be a shapetable
+  // index.
+  inT32 fontinfo_id;
+  // Raw score from the low-level classifier.
+  uinT16 score;
+};
+
+// Struct for information about spacing between characters in a particular font.
+struct FontSpacingInfo {
+  inT16 x_gap_before;
+  inT16 x_gap_after;
+  GenericVector<UNICHAR_ID> kerned_unichar_ids;
+  GenericVector<inT16> kerned_x_gaps;
+};
+
+/*
+ * font_properties contains properties about boldness, italicness, fixed pitch,
+ * serif, fraktur
+ */
+struct FontInfo {
+  FontInfo() : name(NULL), properties(0), universal_id(0), spacing_vec(NULL) {}
+  ~FontInfo() {}
+
+  // Writes to the given file. Returns false in case of error.
+  bool Serialize(FILE* fp) const;
+  // Reads from the given file. Returns false in case of error.
+  // If swap is true, assumes a big/little-endian swap is needed.
+  bool DeSerialize(bool swap, FILE* fp);
+
+  // Reserves unicharset_size spots in spacing_vec.
+  void init_spacing(int unicharset_size) {
+    spacing_vec = new GenericVector<FontSpacingInfo *>();
+    spacing_vec->init_to_size(unicharset_size, NULL);
+  }
+  // Adds the given pointer to FontSpacingInfo to spacing_vec member
+  // (FontInfo class takes ownership of the pointer).
+  // Note: init_spacing should be called before calling this function.
+  void add_spacing(UNICHAR_ID uch_id, FontSpacingInfo *spacing_info) {
+    ASSERT_HOST(spacing_vec != NULL && spacing_vec->size() > uch_id);
+    (*spacing_vec)[uch_id] = spacing_info;
+  }
+
+  // Returns the pointer to FontSpacingInfo for the given UNICHAR_ID.
+  const FontSpacingInfo *get_spacing(UNICHAR_ID uch_id) const {
+    return (spacing_vec == NULL || spacing_vec->size() <= uch_id) ?
+        NULL : (*spacing_vec)[uch_id];
+  }
+
+  // Fills spacing with the value of the x gap expected between the two given
+  // UNICHAR_IDs. Returns true on success.
+  bool get_spacing(UNICHAR_ID prev_uch_id,
+                   UNICHAR_ID uch_id,
+                   int *spacing) const {
+    const FontSpacingInfo *prev_fsi = this->get_spacing(prev_uch_id);
+    const FontSpacingInfo *fsi = this->get_spacing(uch_id);
+    if (prev_fsi == NULL || fsi == NULL) return false;
+    int i = 0;
+    for (; i < prev_fsi->kerned_unichar_ids.size(); ++i) {
+      if (prev_fsi->kerned_unichar_ids[i] == uch_id) break;
+    }
+    if (i < prev_fsi->kerned_unichar_ids.size()) {
+      *spacing = prev_fsi->kerned_x_gaps[i];
+    } else {
+      *spacing = prev_fsi->x_gap_after + fsi->x_gap_before;
+    }
+    return true;
+  }
+
+  bool is_italic() const { return properties & 1; }
+  bool is_bold() const { return (properties & 2) != 0; }
+  bool is_fixed_pitch() const { return (properties & 4) != 0; }
+  bool is_serif() const { return (properties & 8) != 0; }
+  bool is_fraktur() const { return (properties & 16) != 0; }
+
+  char* name;
+  uinT32 properties;
+  // The universal_id is a field reserved for the initialization process
+  // to assign a unique id number to all fonts loaded for the current
+  // combination of languages. This id will then be returned by
+  // ResultIterator::WordFontAttributes.
+  inT32 universal_id;
+  // Horizontal spacing between characters (indexed by UNICHAR_ID).
+  GenericVector<FontSpacingInfo *> *spacing_vec;
+};
+
+// Every class (character) owns a FontSet that represents all the fonts that can
+// render this character.
+// Since almost all the characters from the same script share the same set of
+// fonts, the sets are shared over multiple classes (see
+// Classify::fontset_table_). Thus, a class only store an id to a set.
+// Because some fonts cannot render just one character of a set, there are a
+// lot of FontSet that differ only by one font. Rather than storing directly
+// the FontInfo in the FontSet structure, it's better to share FontInfos among
+// FontSets (Classify::fontinfo_table_).
+struct FontSet {
+  int32_t       size;
+  int32_t*      configs;  // FontInfo ids
+};
+
+// Class that adds a bit of functionality on top of GenericVector to
+// implement a table of FontInfo that replaces UniCityTable<FontInfo>.
+// TODO(rays) change all references once all existing traineddata files
+// are replaced.
+class FontInfoTable : public GenericVector<FontInfo> {
+ public:
+  FontInfoTable();
+  ~FontInfoTable();
+
+  // Writes to the given file. Returns false in case of error.
+  bool Serialize(FILE* fp) const;
+  // Reads from the given file. Returns false in case of error.
+  // If swap is true, assumes a big/little-endian swap is needed.
+  bool DeSerialize(bool swap, FILE* fp);
+
+  // Returns true if the given set of fonts includes one with the same
+  // properties as font_id.
+  bool SetContainsFontProperties(
+      int font_id, const GenericVector<ScoredFont>& font_set) const;
+  // Returns true if the given set of fonts includes multiple properties.
+  bool SetContainsMultipleFontProperties(
+      const GenericVector<ScoredFont>& font_set) const;
+
+  // Moves any non-empty FontSpacingInfo entries from other to this.
+  void MoveSpacingInfoFrom(FontInfoTable* other);
+  // Moves this to the target unicity table.
+  void MoveTo(UnicityTable<FontInfo>* target);
+};
+
+// Compare FontInfo structures.
+bool CompareFontInfo(const FontInfo& fi1, const FontInfo& fi2);
+// Compare FontSet structures.
+bool CompareFontSet(const FontSet& fs1, const FontSet& fs2);
+// Deletion callbacks for GenericVector.
+void FontInfoDeleteCallback(FontInfo f);
+void FontSetDeleteCallback(FontSet fs);
+
+// Callbacks used by UnicityTable to read/write FontInfo/FontSet structures.
+bool read_info(FILE* f, FontInfo* fi, bool swap);
+bool write_info(FILE* f, const FontInfo& fi);
+bool read_spacing_info(FILE *f, FontInfo* fi, bool swap);
+bool write_spacing_info(FILE* f, const FontInfo& fi);
+bool read_set(FILE* f, FontSet* fs, bool swap);
+bool write_set(FILE* f, const FontSet& fs);
+
+}  // namespace tesseract.
+
+#endif /* THIRD_PARTY_TESSERACT_CCSTRUCT_FONTINFO_H_ */
--- a/hgdriver/3rdparty/hgOCR/include/ccstruct/genblob.cpp
+++ b/hgdriver/3rdparty/hgOCR/include/ccstruct/genblob.cpp
@ -0,0 +1,38 @@
+/**********************************************************************
+ * File:        genblob.cpp  (Formerly gblob.c)
+ * Description: Generic Blob processing routines
+ * Author:      Phil Cheatle
+ * Created:     Mon Nov 25 10:53:26 GMT 1991
+ *
+ * (C) Copyright 1991, Hewlett-Packard Ltd.
+ ** Licensed under the Apache License, Version 2.0 (the "License");
+ ** you may not use this file except in compliance with the License.
+ ** You may obtain a copy of the License at
+ ** http://www.apache.org/licenses/LICENSE-2.0
+ ** Unless required by applicable law or agreed to in writing, software
+ ** distributed under the License is distributed on an "AS IS" BASIS,
+ ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ ** See the License for the specific language governing permissions and
+ ** limitations under the License.
+ *
+ **********************************************************************/
+
+#include          "genblob.h"
+#include          "stepblob.h"
+
+/**********************************************************************
+ *  c_blob_comparator()
+ *
+ *  Blob comparator used to sort a blob list so that blobs are in increasing
+ *  order of left edge.
+ **********************************************************************/
+
+int c_blob_comparator(                     // sort blobs
+                      const void *blob1p,  // ptr to ptr to blob1
+                      const void *blob2p   // ptr to ptr to blob2
+                     ) {
+  C_BLOB *blob1 = *(C_BLOB **) blob1p;
+  C_BLOB *blob2 = *(C_BLOB **) blob2p;
+
+  return blob1->bounding_box ().left () - blob2->bounding_box ().left ();
+}
--- a/hgdriver/3rdparty/hgOCR/include/ccstruct/genblob.h
+++ b/hgdriver/3rdparty/hgOCR/include/ccstruct/genblob.h
@ -0,0 +1,27 @@
+/**********************************************************************
+ * File:        genblob.h  (Formerly gblob.h)
+ * Description: Generic Blob processing routines
+ * Author:      Phil Cheatle
+ * Created:     Mon Nov 25 10:53:26 GMT 1991
+ *
+ * (C) Copyright 1991, Hewlett-Packard Ltd.
+ ** Licensed under the Apache License, Version 2.0 (the "License");
+ ** you may not use this file except in compliance with the License.
+ ** You may obtain a copy of the License at
+ ** http://www.apache.org/licenses/LICENSE-2.0
+ ** Unless required by applicable law or agreed to in writing, software
+ ** distributed under the License is distributed on an "AS IS" BASIS,
+ ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ ** See the License for the specific language governing permissions and
+ ** limitations under the License.
+ *
+ **********************************************************************/
+
+#ifndef           GENBLOB_H
+#define           GENBLOB_H
+
+// Sort function to sort blobs by ascending left edge.
+int c_blob_comparator(const void *blob1p,  // ptr to ptr to blob1
+                      const void *blob2p);
+
+#endif
--- a/hgdriver/3rdparty/hgOCR/include/ccstruct/hpdsizes.h
+++ b/hgdriver/3rdparty/hgOCR/include/ccstruct/hpdsizes.h
@ -0,0 +1,17 @@
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+// http://www.apache.org/licenses/LICENSE-2.0
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+#ifndef           HPDSIZES_H
+#define           HPDSIZES_H
+
+#define NUM_TEXT_ATTR   10
+#define NUM_BLOCK_ATTR   7
+#define MAXLENGTH 128
+#define NUM_BACKGROUNDS   8
+#endif
--- a/hgdriver/3rdparty/hgOCR/include/ccstruct/imagedata.cpp
+++ b/hgdriver/3rdparty/hgOCR/include/ccstruct/imagedata.cpp
@ -0,0 +1,699 @@
+///////////////////////////////////////////////////////////////////////
+// File:        imagedata.h
+// Description: Class to hold information about a single multi-page tiff
+//              training file and its corresponding boxes or text file.
+// Author:      Ray Smith
+// Created:     Tue May 28 08:56:06 PST 2013
+//
+// (C) Copyright 2013, Google Inc.
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+// http://www.apache.org/licenses/LICENSE-2.0
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+///////////////////////////////////////////////////////////////////////
+
+// Include automatically generated configuration file if running autoconf.
+#ifdef HAVE_CONFIG_H
+#include "config_auto.h"
+#endif
+
+#include "imagedata.h"
+
+#include "allheaders.h"
+#include "boxread.h"
+#include "callcpp.h"
+#include "helpers.h"
+#include "tprintf.h"
+
+#if defined(__MINGW32__)
+# include <unistd.h>
+#elif __cplusplus > 199711L   // in C++11
+# include <thread>
+#endif
+
+// Number of documents to read ahead while training. Doesn't need to be very
+// large.
+const int kMaxReadAhead = 8;
+
+namespace tesseract {
+
+WordFeature::WordFeature() : x_(0), y_(0), dir_(0) {
+}
+
+WordFeature::WordFeature(const FCOORD& fcoord, uinT8 dir)
+  : x_(IntCastRounded(fcoord.x())),
+    y_(ClipToRange(IntCastRounded(fcoord.y()), 0, MAX_UINT8)),
+    dir_(dir) {
+}
+
+// Computes the maximum x and y value in the features.
+void WordFeature::ComputeSize(const GenericVector<WordFeature>& features,
+                              int* max_x, int* max_y) {
+  *max_x = 0;
+  *max_y = 0;
+  for (int f = 0; f < features.size(); ++f) {
+    if (features[f].x_ > *max_x) *max_x = features[f].x_;
+    if (features[f].y_ > *max_y) *max_y = features[f].y_;
+  }
+}
+
+// Draws the features in the given window.
+void WordFeature::Draw(const GenericVector<WordFeature>& features,
+                       ScrollView* window) {
+#ifndef GRAPHICS_DISABLED
+  for (int f = 0; f < features.size(); ++f) {
+    FCOORD pos(features[f].x_, features[f].y_);
+    FCOORD dir;
+    dir.from_direction(features[f].dir_);
+    dir *= 8.0f;
+    window->SetCursor(IntCastRounded(pos.x() - dir.x()),
+                      IntCastRounded(pos.y() - dir.y()));
+    window->DrawTo(IntCastRounded(pos.x() + dir.x()),
+                      IntCastRounded(pos.y() + dir.y()));
+  }
+#endif
+}
+
+// Writes to the given file. Returns false in case of error.
+bool WordFeature::Serialize(FILE* fp) const {
+  if (fwrite(&x_, sizeof(x_), 1, fp) != 1) return false;
+  if (fwrite(&y_, sizeof(y_), 1, fp) != 1) return false;
+  if (fwrite(&dir_, sizeof(dir_), 1, fp) != 1) return false;
+  return true;
+}
+// Reads from the given file. Returns false in case of error.
+// If swap is true, assumes a big/little-endian swap is needed.
+bool WordFeature::DeSerialize(bool swap, FILE* fp) {
+  if (fread(&x_, sizeof(x_), 1, fp) != 1) return false;
+  if (swap) ReverseN(&x_, sizeof(x_));
+  if (fread(&y_, sizeof(y_), 1, fp) != 1) return false;
+  if (fread(&dir_, sizeof(dir_), 1, fp) != 1) return false;
+  return true;
+}
+
+void FloatWordFeature::FromWordFeatures(
+    const GenericVector<WordFeature>& word_features,
+    GenericVector<FloatWordFeature>* float_features) {
+  for (int i = 0; i < word_features.size(); ++i) {
+    FloatWordFeature f;
+    f.x = word_features[i].x();
+    f.y = word_features[i].y();
+    f.dir = word_features[i].dir();
+    f.x_bucket = 0;  // Will set it later.
+    float_features->push_back(f);
+  }
+}
+
+// Sort function to sort first by x-bucket, then by y.
+/* static */
+int FloatWordFeature::SortByXBucket(const void* v1, const void* v2) {
+  const FloatWordFeature* f1 = reinterpret_cast<const FloatWordFeature*>(v1);
+  const FloatWordFeature* f2 = reinterpret_cast<const FloatWordFeature*>(v2);
+  int x_diff = f1->x_bucket - f2->x_bucket;
+  if (x_diff == 0) return f1->y - f2->y;
+  return x_diff;
+}
+
+ImageData::ImageData() : page_number_(-1), vertical_text_(false) {
+}
+// Takes ownership of the pix and destroys it.
+ImageData::ImageData(bool vertical, Pix* pix)
+  : page_number_(0), vertical_text_(vertical) {
+  SetPix(pix);
+}
+ImageData::~ImageData() {
+}
+
+// Builds and returns an ImageData from the basic data. Note that imagedata,
+// truth_text, and box_text are all the actual file data, NOT filenames.
+ImageData* ImageData::Build(const char* name, int page_number, const char* lang,
+                            const char* imagedata, int imagedatasize,
+                            const char* truth_text, const char* box_text) {
+  ImageData* image_data = new ImageData();
+  image_data->imagefilename_ = name;
+  image_data->page_number_ = page_number;
+  image_data->language_ = lang;
+  // Save the imagedata.
+  image_data->image_data_.resize_no_init(imagedatasize);
+  memcpy(&image_data->image_data_[0], imagedata, imagedatasize);
+  if (!image_data->AddBoxes(box_text)) {
+    if (truth_text == NULL || truth_text[0] == '\0') {
+      tprintf("Error: No text corresponding to page %d from image %s!\n",
+              page_number, name);
+      delete image_data;
+      return NULL;
+    }
+    image_data->transcription_ = truth_text;
+    // If we have no boxes, the transcription is in the 0th box_texts_.
+    image_data->box_texts_.push_back(truth_text);
+    // We will create a box for the whole image on PreScale, to save unpacking
+    // the image now.
+  } else if (truth_text != NULL && truth_text[0] != '\0' &&
+             image_data->transcription_ != truth_text) {
+    // Save the truth text as it is present and disagrees with the box text.
+    image_data->transcription_ = truth_text;
+  }
+  return image_data;
+}
+
+// Writes to the given file. Returns false in case of error.
+bool ImageData::Serialize(TFile* fp) const {
+  if (!imagefilename_.Serialize(fp)) return false;
+  if (fp->FWrite(&page_number_, sizeof(page_number_), 1) != 1) return false;
+  if (!image_data_.Serialize(fp)) return false;
+  if (!transcription_.Serialize(fp)) return false;
+  // WARNING: Will not work across different endian machines.
+  if (!boxes_.Serialize(fp)) return false;
+  if (!box_texts_.SerializeClasses(fp)) return false;
+  inT8 vertical = vertical_text_;
+  if (fp->FWrite(&vertical, sizeof(vertical), 1) != 1) return false;
+  return true;
+}
+
+// Reads from the given file. Returns false in case of error.
+// If swap is true, assumes a big/little-endian swap is needed.
+bool ImageData::DeSerialize(bool swap, TFile* fp) {
+  if (!imagefilename_.DeSerialize(swap, fp)) return false;
+  if (fp->FRead(&page_number_, sizeof(page_number_), 1) != 1) return false;
+  if (swap) ReverseN(&page_number_, sizeof(page_number_));
+  if (!image_data_.DeSerialize(swap, fp)) return false;
+  if (!transcription_.DeSerialize(swap, fp)) return false;
+  // WARNING: Will not work across different endian machines.
+  if (!boxes_.DeSerialize(swap, fp)) return false;
+  if (!box_texts_.DeSerializeClasses(swap, fp)) return false;
+  inT8 vertical = 0;
+  if (fp->FRead(&vertical, sizeof(vertical), 1) != 1) return false;
+  vertical_text_ = vertical != 0;
+  return true;
+}
+
+// As DeSerialize, but only seeks past the data - hence a static method.
+bool ImageData::SkipDeSerialize(bool swap, TFile* fp) {
+  if (!STRING::SkipDeSerialize(swap, fp)) return false;
+  inT32 page_number;
+  if (fp->FRead(&page_number, sizeof(page_number), 1) != 1) return false;
+  if (!GenericVector<char>::SkipDeSerialize(swap, fp)) return false;
+  if (!STRING::SkipDeSerialize(swap, fp)) return false;
+  if (!GenericVector<TBOX>::SkipDeSerialize(swap, fp)) return false;
+  if (!GenericVector<STRING>::SkipDeSerializeClasses(swap, fp)) return false;
+  inT8 vertical = 0;
+  return fp->FRead(&vertical, sizeof(vertical), 1) == 1;
+}
+
+// Saves the given Pix as a PNG-encoded string and destroys it.
+void ImageData::SetPix(Pix* pix) {
+  SetPixInternal(pix, &image_data_);
+}
+
+// Returns the Pix image for *this. Must be pixDestroyed after use.
+Pix* ImageData::GetPix() const {
+  return GetPixInternal(image_data_);
+}
+
+// Gets anything and everything with a non-NULL pointer, prescaled to a
+// given target_height (if 0, then the original image height), and aligned.
+// Also returns (if not NULL) the width and height of the scaled image.
+// The return value is the scaled Pix, which must be pixDestroyed after use,
+// and scale_factor (if not NULL) is set to the scale factor that was applied
+// to the image to achieve the target_height.
+Pix* ImageData::PreScale(int target_height, int max_height, float* scale_factor,
+                         int* scaled_width, int* scaled_height,
+                         GenericVector<TBOX>* boxes) const {
+  int input_width = 0;
+  int input_height = 0;
+  Pix* src_pix = GetPix();
+  ASSERT_HOST(src_pix != NULL);
+  input_width = pixGetWidth(src_pix);
+  input_height = pixGetHeight(src_pix);
+  if (target_height == 0) {
+    target_height = MIN(input_height, max_height);
+  }
+  float im_factor = static_cast<float>(target_height) / input_height;
+  if (scaled_width != NULL)
+    *scaled_width = IntCastRounded(im_factor * input_width);
+  if (scaled_height != NULL)
+    *scaled_height = target_height;
+  // Get the scaled image.
+  Pix* pix = pixScale(src_pix, im_factor, im_factor);
+  if (pix == NULL) {
+    tprintf("Scaling pix of size %d, %d by factor %g made null pix!!\n",
+            input_width, input_height, im_factor);
+  }
+  if (scaled_width != NULL) *scaled_width = pixGetWidth(pix);
+  if (scaled_height != NULL) *scaled_height = pixGetHeight(pix);
+  pixDestroy(&src_pix);
+  if (boxes != NULL) {
+    // Get the boxes.
+    boxes->truncate(0);
+    for (int b = 0; b < boxes_.size(); ++b) {
+      TBOX box = boxes_[b];
+      box.scale(im_factor);
+      boxes->push_back(box);
+    }
+    if (boxes->empty()) {
+      // Make a single box for the whole image.
+      TBOX box(0, 0, im_factor * input_width, target_height);
+      boxes->push_back(box);
+    }
+  }
+  if (scale_factor != NULL) *scale_factor = im_factor;
+  return pix;
+}
+
+int ImageData::MemoryUsed() const {
+  return image_data_.size();
+}
+
+// Draws the data in a new window.
+void ImageData::Display() const {
+#ifndef GRAPHICS_DISABLED
+  const int kTextSize = 64;
+  // Draw the image.
+  Pix* pix = GetPix();
+  if (pix == NULL) return;
+  int width = pixGetWidth(pix);
+  int height = pixGetHeight(pix);
+  ScrollView* win = new ScrollView("Imagedata", 100, 100,
+                                   2 * (width + 2 * kTextSize),
+                                   2 * (height + 4 * kTextSize),
+                                   width + 10, height + 3 * kTextSize, true);
+  win->Image(pix, 0, height - 1);
+  pixDestroy(&pix);
+  // Draw the boxes.
+  win->Pen(ScrollView::RED);
+  win->Brush(ScrollView::NONE);
+  int text_size = kTextSize;
+  if (!boxes_.empty() && boxes_[0].height() * 2 < text_size)
+    text_size = boxes_[0].height() * 2;
+  win->TextAttributes("Arial", text_size, false, false, false);
+  if (!boxes_.empty()) {
+    for (int b = 0; b < boxes_.size(); ++b) {
+      boxes_[b].plot(win);
+      win->Text(boxes_[b].left(), height + kTextSize, box_texts_[b].string());
+    }
+  } else {
+    // The full transcription.
+    win->Pen(ScrollView::CYAN);
+    win->Text(0, height + kTextSize * 2, transcription_.string());
+  }
+  win->Update();
+  window_wait(win);
+#endif
+}
+
+// Adds the supplied boxes and transcriptions that correspond to the correct
+// page number.
+void ImageData::AddBoxes(const GenericVector<TBOX>& boxes,
+                         const GenericVector<STRING>& texts,
+                         const GenericVector<int>& box_pages) {
+  // Copy the boxes and make the transcription.
+  for (int i = 0; i < box_pages.size(); ++i) {
+    if (page_number_ >= 0 && box_pages[i] != page_number_) continue;
+    transcription_ += texts[i];
+    boxes_.push_back(boxes[i]);
+    box_texts_.push_back(texts[i]);
+  }
+}
+
+// Saves the given Pix as a PNG-encoded string and destroys it.
+void ImageData::SetPixInternal(Pix* pix, GenericVector<char>* image_data) {
+  l_uint8* data;
+  size_t size;
+  pixWriteMem(&data, &size, pix, IFF_PNG);
+  pixDestroy(&pix);
+  image_data->resize_no_init(size);
+  memcpy(&(*image_data)[0], data, size);
+  free(data);
+}
+
+// Returns the Pix image for the image_data. Must be pixDestroyed after use.
+Pix* ImageData::GetPixInternal(const GenericVector<char>& image_data) {
+  Pix* pix = NULL;
+  if (!image_data.empty()) {
+    // Convert the array to an image.
+    const unsigned char* u_data =
+        reinterpret_cast<const unsigned char*>(&image_data[0]);
+    pix = pixReadMem(u_data, image_data.size());
+  }
+  return pix;
+}
+
+// Parses the text string as a box file and adds any discovered boxes that
+// match the page number. Returns false on error.
+bool ImageData::AddBoxes(const char* box_text) {
+  if (box_text != NULL && box_text[0] != '\0') {
+    GenericVector<TBOX> boxes;
+    GenericVector<STRING> texts;
+    GenericVector<int> box_pages;
+    if (ReadMemBoxes(page_number_, false, box_text, &boxes,
+                     &texts, NULL, &box_pages)) {
+      AddBoxes(boxes, texts, box_pages);
+      return true;
+    } else {
+      tprintf("Error: No boxes for page %d from image %s!\n",
+              page_number_, imagefilename_.string());
+    }
+  }
+  return false;
+}
+
+// Thread function to call ReCachePages.
+void* ReCachePagesFunc(void* data) {
+  DocumentData* document_data = reinterpret_cast<DocumentData*>(data);
+  document_data->ReCachePages();
+  return NULL;
+}
+
+DocumentData::DocumentData(const STRING& name)
+    : document_name_(name),
+      pages_offset_(-1),
+      total_pages_(-1),
+      memory_used_(0),
+      max_memory_(0),
+      reader_(NULL) {}
+
+DocumentData::~DocumentData() {
+  SVAutoLock lock_p(&pages_mutex_);
+  SVAutoLock lock_g(&general_mutex_);
+}
+
+// Reads all the pages in the given lstmf filename to the cache. The reader
+// is used to read the file.
+bool DocumentData::LoadDocument(const char* filename, const char* lang,
+                                int start_page, inT64 max_memory,
+                                FileReader reader) {
+  SetDocument(filename, lang, max_memory, reader);
+  pages_offset_ = start_page;
+  return ReCachePages();
+}
+
+// Sets up the document, without actually loading it.
+void DocumentData::SetDocument(const char* filename, const char* lang,
+                               inT64 max_memory, FileReader reader) {
+  SVAutoLock lock_p(&pages_mutex_);
+  SVAutoLock lock(&general_mutex_);
+  document_name_ = filename;
+  lang_ = lang;
+  pages_offset_ = -1;
+  max_memory_ = max_memory;
+  reader_ = reader;
+}
+
+// Writes all the pages to the given filename. Returns false on error.
+bool DocumentData::SaveDocument(const char* filename, FileWriter writer) {
+  SVAutoLock lock(&pages_mutex_);
+  TFile fp;
+  fp.OpenWrite(NULL);
+  if (!pages_.Serialize(&fp) || !fp.CloseWrite(filename, writer)) {
+    tprintf("Serialize failed: %s\n", filename);
+    return false;
+  }
+  return true;
+}
+bool DocumentData::SaveToBuffer(GenericVector<char>* buffer) {
+  SVAutoLock lock(&pages_mutex_);
+  TFile fp;
+  fp.OpenWrite(buffer);
+  return pages_.Serialize(&fp);
+}
+
+// Adds the given page data to this document, counting up memory.
+void DocumentData::AddPageToDocument(ImageData* page) {
+  SVAutoLock lock(&pages_mutex_);
+  pages_.push_back(page);
+  set_memory_used(memory_used() + page->MemoryUsed());
+}
+
+// If the given index is not currently loaded, loads it using a separate
+// thread.
+void DocumentData::LoadPageInBackground(int index) {
+  ImageData* page = NULL;
+  if (IsPageAvailable(index, &page)) return;
+  SVAutoLock lock(&pages_mutex_);
+  if (pages_offset_ == index) return;
+  pages_offset_ = index;
+  pages_.clear();
+  #ifndef GRAPHICS_DISABLED
+  SVSync::StartThread(ReCachePagesFunc, this);
+  #endif  // GRAPHICS_DISABLED
+}
+
+// Returns a pointer to the page with the given index, modulo the total
+// number of pages. Blocks until the background load is completed.
+const ImageData* DocumentData::GetPage(int index) {
+  ImageData* page = NULL;
+  while (!IsPageAvailable(index, &page)) {
+    // If there is no background load scheduled, schedule one now.
+    pages_mutex_.Lock();
+    bool needs_loading = pages_offset_ != index;
+    pages_mutex_.Unlock();
+    if (needs_loading) LoadPageInBackground(index);
+    // We can't directly load the page, or the background load will delete it
+    // while the caller is using it, so give it a chance to work.
+#if __cplusplus > 199711L && !defined(__MINGW32__)
+    std::this_thread::sleep_for(std::chrono::seconds(1));
+#elif _WIN32  // MSVS
+    Sleep(1000);
+#else
+    sleep(1);
+#endif
+  }
+  return page;
+}
+
+// Returns true if the requested page is available, and provides a pointer,
+// which may be NULL if the document is empty. May block, even though it
+// doesn't guarantee to return true.
+bool DocumentData::IsPageAvailable(int index, ImageData** page) {
+  SVAutoLock lock(&pages_mutex_);
+  int num_pages = NumPages();
+  if (num_pages == 0 || index < 0) {
+    *page = NULL;  // Empty Document.
+    return true;
+  }
+  if (num_pages > 0) {
+    index = Modulo(index, num_pages);
+    if (pages_offset_ <= index && index < pages_offset_ + pages_.size()) {
+      *page = pages_[index - pages_offset_];  // Page is available already.
+      return true;
+    }
+  }
+  return false;
+}
+
+// Removes all pages from memory and frees the memory, but does not forget
+// the document metadata.
+inT64 DocumentData::UnCache() {
+  SVAutoLock lock(&pages_mutex_);
+  inT64 memory_saved = memory_used();
+  pages_.clear();
+  pages_offset_ = -1;
+  set_total_pages(-1);
+  set_memory_used(0);
+  tprintf("Unloaded document %s, saving %d memory\n", document_name_.string(),
+          memory_saved);
+  return memory_saved;
+}
+
+// Locks the pages_mutex_ and Loads as many pages can fit in max_memory_
+// starting at index pages_offset_.
+bool DocumentData::ReCachePages() {
+  SVAutoLock lock(&pages_mutex_);
+  // Read the file.
+  set_total_pages(0);
+  set_memory_used(0);
+  int loaded_pages = 0;
+  pages_.truncate(0);
+  TFile fp;
+  if (!fp.Open(document_name_, reader_) ||
+      !PointerVector<ImageData>::DeSerializeSize(false, &fp, &loaded_pages) ||
+      loaded_pages <= 0) {
+    tprintf("Deserialize header failed: %s\n", document_name_.string());
+    return false;
+  }
+  pages_offset_ %= loaded_pages;
+  // Skip pages before the first one we want, and load the rest until max
+  // memory and skip the rest after that.
+  int page;
+  for (page = 0; page < loaded_pages; ++page) {
+    if (page < pages_offset_ ||
+        (max_memory_ > 0 && memory_used() > max_memory_)) {
+      if (!PointerVector<ImageData>::DeSerializeSkip(false, &fp)) break;
+    } else {
+      if (!pages_.DeSerializeElement(false, &fp)) break;
+      ImageData* image_data = pages_.back();
+      if (image_data->imagefilename().length() == 0) {
+        image_data->set_imagefilename(document_name_);
+        image_data->set_page_number(page);
+      }
+      image_data->set_language(lang_);
+      set_memory_used(memory_used() + image_data->MemoryUsed());
+    }
+  }
+  if (page < loaded_pages) {
+    tprintf("Deserialize failed: %s read %d/%d pages\n",
+            document_name_.string(), page, loaded_pages);
+    pages_.truncate(0);
+  } else {
+    tprintf("Loaded %d/%d pages (%d-%d) of document %s\n", pages_.size(),
+            loaded_pages, pages_offset_, pages_offset_ + pages_.size(),
+            document_name_.string());
+  }
+  set_total_pages(loaded_pages);
+  return !pages_.empty();
+}
+
+// A collection of DocumentData that knows roughly how much memory it is using.
+DocumentCache::DocumentCache(inT64 max_memory)
+    : num_pages_per_doc_(0), max_memory_(max_memory) {}
+DocumentCache::~DocumentCache() {}
+
+// Adds all the documents in the list of filenames, counting memory.
+// The reader is used to read the files.
+bool DocumentCache::LoadDocuments(const GenericVector<STRING>& filenames,
+                                  const char* lang,
+                                  CachingStrategy cache_strategy,
+                                  FileReader reader) {
+  cache_strategy_ = cache_strategy;
+  inT64 fair_share_memory = 0;
+  // In the round-robin case, each DocumentData handles restricting its content
+  // to its fair share of memory. In the sequential case, DocumentCache
+  // determines which DocumentDatas are held entirely in memory.
+  if (cache_strategy_ == CS_ROUND_ROBIN)
+    fair_share_memory = max_memory_ / filenames.size();
+  for (int arg = 0; arg < filenames.size(); ++arg) {
+    STRING filename = filenames[arg];
+    DocumentData* document = new DocumentData(filename);
+    document->SetDocument(filename.string(), lang, fair_share_memory, reader);
+    AddToCache(document);
+  }
+  if (!documents_.empty()) {
+    // Try to get the first page now to verify the list of filenames.
+    if (GetPageBySerial(0) != NULL) return true;
+    tprintf("Load of page 0 failed!\n");
+  }
+  return false;
+}
+
+// Adds document to the cache.
+bool DocumentCache::AddToCache(DocumentData* data) {
+  inT64 new_memory = data->memory_used();
+  documents_.push_back(data);
+  return true;
+}
+
+// Finds and returns a document by name.
+DocumentData* DocumentCache::FindDocument(const STRING& document_name) const {
+  for (int i = 0; i < documents_.size(); ++i) {
+    if (documents_[i]->document_name() == document_name)
+      return documents_[i];
+  }
+  return NULL;
+}
+
+// Returns the total number of pages in an epoch. For CS_ROUND_ROBIN cache
+// strategy, could take a long time.
+int DocumentCache::TotalPages() {
+  if (cache_strategy_ == CS_SEQUENTIAL) {
+    // In sequential mode, we assume each doc has the same number of pages
+    // whether it is true or not.
+    if (num_pages_per_doc_ == 0) GetPageSequential(0);
+    return num_pages_per_doc_ * documents_.size();
+  }
+  int total_pages = 0;
+  int num_docs = documents_.size();
+  for (int d = 0; d < num_docs; ++d) {
+    // We have to load a page to make NumPages() valid.
+    documents_[d]->GetPage(0);
+    total_pages += documents_[d]->NumPages();
+  }
+  return total_pages;
+}
+
+// Returns a page by serial number, selecting them in a round-robin fashion
+// from all the documents. Highly disk-intensive, but doesn't need samples
+// to be shuffled between files to begin with.
+const ImageData* DocumentCache::GetPageRoundRobin(int serial) {
+  int num_docs = documents_.size();
+  int doc_index = serial % num_docs;
+  const ImageData* doc = documents_[doc_index]->GetPage(serial / num_docs);
+  for (int offset = 1; offset <= kMaxReadAhead && offset < num_docs; ++offset) {
+    doc_index = (serial + offset) % num_docs;
+    int page = (serial + offset) / num_docs;
+    documents_[doc_index]->LoadPageInBackground(page);
+  }
+  return doc;
+}
+
+// Returns a page by serial number, selecting them in sequence from each file.
+// Requires the samples to be shuffled between the files to give a random or
+// uniform distribution of data. Less disk-intensive than GetPageRoundRobin.
+const ImageData* DocumentCache::GetPageSequential(int serial) {
+  int num_docs = documents_.size();
+  ASSERT_HOST(num_docs > 0);
+  if (num_pages_per_doc_ == 0) {
+    // Use the pages in the first doc as the number of pages in each doc.
+    documents_[0]->GetPage(0);
+    num_pages_per_doc_ = documents_[0]->NumPages();
+    if (num_pages_per_doc_ == 0) {
+      tprintf("First document cannot be empty!!\n");
+      ASSERT_HOST(num_pages_per_doc_ > 0);
+    }
+    // Get rid of zero now if we don't need it.
+    if (serial / num_pages_per_doc_ % num_docs > 0) documents_[0]->UnCache();
+  }
+  int doc_index = serial / num_pages_per_doc_ % num_docs;
+  const ImageData* doc =
+      documents_[doc_index]->GetPage(serial % num_pages_per_doc_);
+  // Count up total memory. Background loading makes it more complicated to
+  // keep a running count.
+  inT64 total_memory = 0;
+  for (int d = 0; d < num_docs; ++d) {
+    total_memory += documents_[d]->memory_used();
+  }
+  if (total_memory >= max_memory_) {
+    // Find something to un-cache.
+    // If there are more than 3 in front, then serial is from the back reader
+    // of a pair of readers. If we un-cache from in-front-2 to 2-ahead, then
+    // we create a hole between them and then un-caching the backmost occupied
+    // will work for both.
+    int num_in_front = CountNeighbourDocs(doc_index, 1);
+    for (int offset = num_in_front - 2;
+         offset > 1 && total_memory >= max_memory_; --offset) {
+      int next_index = (doc_index + offset) % num_docs;
+      total_memory -= documents_[next_index]->UnCache();
+    }
+    // If that didn't work, the best solution is to un-cache from the back. If
+    // we take away the document that a 2nd reader is using, it will put it
+    // back and make a hole between.
+    int num_behind = CountNeighbourDocs(doc_index, -1);
+    for (int offset = num_behind; offset < 0 && total_memory >= max_memory_;
+         ++offset) {
+      int next_index = (doc_index + offset + num_docs) % num_docs;
+      total_memory -= documents_[next_index]->UnCache();
+    }
+  }
+  int next_index = (doc_index + 1) % num_docs;
+  if (!documents_[next_index]->IsCached() && total_memory < max_memory_) {
+    documents_[next_index]->LoadPageInBackground(0);
+  }
+  return doc;
+}
+
+// Helper counts the number of adjacent cached neighbours of index looking in
+// direction dir, ie index+dir, index+2*dir etc.
+int DocumentCache::CountNeighbourDocs(int index, int dir) {
+  int num_docs = documents_.size();
+  for (int offset = dir; abs(offset) < num_docs; offset += dir) {
+    int offset_index = (index + offset + num_docs) % num_docs;
+    if (!documents_[offset_index]->IsCached()) return offset - dir;
+  }
+  return num_docs;
+}
+
+}  // namespace tesseract.
--- a/hgdriver/3rdparty/hgOCR/include/ccstruct/imagedata.h
+++ b/hgdriver/3rdparty/hgOCR/include/ccstruct/imagedata.h
@ -0,0 +1,379 @@
+///////////////////////////////////////////////////////////////////////
+// File:        imagedata.h
+// Description: Class to hold information about a single image and its
+//              corresponding boxes or text file.
+// Author:      Ray Smith
+// Created:     Mon Jul 22 14:17:06 PDT 2013
+//
+// (C) Copyright 2013, Google Inc.
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+// http://www.apache.org/licenses/LICENSE-2.0
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+///////////////////////////////////////////////////////////////////////
+
+#ifndef TESSERACT_IMAGE_IMAGEDATA_H_
+#define TESSERACT_IMAGE_IMAGEDATA_H_
+
+
+#include "genericvector.h"
+#include "normalis.h"
+#include "rect.h"
+#include "strngs.h"
+#include "svutil.h"
+
+struct Pix;
+
+namespace tesseract {
+
+// Amount of padding to apply in output pixels in feature mode.
+const int kFeaturePadding = 2;
+// Number of pixels to pad around text boxes.
+const int kImagePadding = 4;
+
+// Enum to determine the caching and data sequencing strategy.
+enum CachingStrategy {
+  // Reads all of one file before moving on to the next. Requires samples to be
+  // shuffled across files. Uses the count of samples in the first file as
+  // the count in all the files to achieve high-speed random access. As a
+  // consequence, if subsequent files are smaller, they get entries used more
+  // than once, and if subsequent files are larger, some entries are not used.
+  // Best for larger data sets that don't fit in memory.
+  CS_SEQUENTIAL,
+  // Reads one sample from each file in rotation. Does not require shuffled
+  // samples, but is extremely disk-intensive. Samples in smaller files also
+  // get used more often than samples in larger files.
+  // Best for smaller data sets that mostly fit in memory.
+  CS_ROUND_ROBIN,
+};
+
+class WordFeature {
+ public:
+  WordFeature();
+  WordFeature(const FCOORD& fcoord, uinT8 dir);
+
+  // Computes the maximum x and y value in the features.
+  static void ComputeSize(const GenericVector<WordFeature>& features,
+                          int* max_x, int* max_y);
+  // Draws the features in the given window.
+  static void Draw(const GenericVector<WordFeature>& features,
+                   ScrollView* window);
+
+  // Accessors.
+  int x() const { return x_; }
+  int y() const { return y_; }
+  int dir() const { return dir_; }
+
+  // Writes to the given file. Returns false in case of error.
+  bool Serialize(FILE* fp) const;
+  // Reads from the given file. Returns false in case of error.
+  // If swap is true, assumes a big/little-endian swap is needed.
+  bool DeSerialize(bool swap, FILE* fp);
+
+ private:
+  inT16 x_;
+  uinT8 y_;
+  uinT8 dir_;
+};
+
+// A floating-point version of WordFeature, used as an intermediate during
+// scaling.
+struct FloatWordFeature {
+  static void FromWordFeatures(const GenericVector<WordFeature>& word_features,
+                               GenericVector<FloatWordFeature>* float_features);
+  // Sort function to sort first by x-bucket, then by y.
+  static int SortByXBucket(const void*, const void*);
+
+  float x;
+  float y;
+  float dir;
+  int x_bucket;
+};
+
+// Class to hold information on a single image:
+// Filename, cached image as a Pix*, character boxes, text transcription.
+// The text transcription is the ground truth UTF-8 text for the image.
+// Character boxes are optional and indicate the desired segmentation of
+// the text into recognition units.
+class ImageData {
+ public:
+  ImageData();
+  // Takes ownership of the pix.
+  ImageData(bool vertical, Pix* pix);
+  ~ImageData();
+
+  // Builds and returns an ImageData from the basic data. Note that imagedata,
+  // truth_text, and box_text are all the actual file data, NOT filenames.
+  static ImageData* Build(const char* name, int page_number, const char* lang,
+                          const char* imagedata, int imagedatasize,
+                          const char* truth_text, const char* box_text);
+
+  // Writes to the given file. Returns false in case of error.
+  bool Serialize(TFile* fp) const;
+  // Reads from the given file. Returns false in case of error.
+  // If swap is true, assumes a big/little-endian swap is needed.
+  bool DeSerialize(bool swap, TFile* fp);
+  // As DeSerialize, but only seeks past the data - hence a static method.
+  static bool SkipDeSerialize(bool swap, tesseract::TFile* fp);
+
+  // Other accessors.
+  const STRING& imagefilename() const {
+    return imagefilename_;
+  }
+  void set_imagefilename(const STRING& name) {
+    imagefilename_ = name;
+  }
+  int page_number() const {
+    return page_number_;
+  }
+  void set_page_number(int num) {
+    page_number_ = num;
+  }
+  const GenericVector<char>& image_data() const {
+    return image_data_;
+  }
+  const STRING& language() const {
+    return language_;
+  }
+  void set_language(const STRING& lang) {
+    language_ = lang;
+  }
+  const STRING& transcription() const {
+    return transcription_;
+  }
+  const GenericVector<TBOX>& boxes() const {
+    return boxes_;
+  }
+  const GenericVector<STRING>& box_texts() const {
+    return box_texts_;
+  }
+  const STRING& box_text(int index) const {
+    return box_texts_[index];
+  }
+  // Saves the given Pix as a PNG-encoded string and destroys it.
+  void SetPix(Pix* pix);
+  // Returns the Pix image for *this. Must be pixDestroyed after use.
+  Pix* GetPix() const;
+  // Gets anything and everything with a non-NULL pointer, prescaled to a
+  // given target_height (if 0, then the original image height), and aligned.
+  // Also returns (if not NULL) the width and height of the scaled image.
+  // The return value is the scaled Pix, which must be pixDestroyed after use,
+  // and scale_factor (if not NULL) is set to the scale factor that was applied
+  // to the image to achieve the target_height.
+  Pix* PreScale(int target_height, int max_height, float* scale_factor,
+                int* scaled_width, int* scaled_height,
+                GenericVector<TBOX>* boxes) const;
+
+  int MemoryUsed() const;
+
+  // Draws the data in a new window.
+  void Display() const;
+
+  // Adds the supplied boxes and transcriptions that correspond to the correct
+  // page number.
+  void AddBoxes(const GenericVector<TBOX>& boxes,
+                const GenericVector<STRING>& texts,
+                const GenericVector<int>& box_pages);
+
+ private:
+  // Saves the given Pix as a PNG-encoded string and destroys it.
+  static void SetPixInternal(Pix* pix, GenericVector<char>* image_data);
+  // Returns the Pix image for the image_data. Must be pixDestroyed after use.
+  static Pix* GetPixInternal(const GenericVector<char>& image_data);
+  // Parses the text string as a box file and adds any discovered boxes that
+  // match the page number. Returns false on error.
+  bool AddBoxes(const char* box_text);
+
+ private:
+  STRING imagefilename_;             // File to read image from.
+  inT32 page_number_;                // Page number if multi-page tif or -1.
+  GenericVector<char> image_data_;   // PNG file data.
+  STRING language_;                  // Language code for image.
+  STRING transcription_;             // UTF-8 ground truth of image.
+  GenericVector<TBOX> boxes_;        // If non-empty boxes of the image.
+  GenericVector<STRING> box_texts_;  // String for text in each box.
+  bool vertical_text_;               // Image has been rotated from vertical.
+};
+
+// A collection of ImageData that knows roughly how much memory it is using.
+class DocumentData {
+  friend void* ReCachePagesFunc(void* data);
+
+ public:
+  explicit DocumentData(const STRING& name);
+  ~DocumentData();
+
+  // Reads all the pages in the given lstmf filename to the cache. The reader
+  // is used to read the file.
+  bool LoadDocument(const char* filename, const char* lang, int start_page,
+                    inT64 max_memory, FileReader reader);
+  // Sets up the document, without actually loading it.
+  void SetDocument(const char* filename, const char* lang, inT64 max_memory,
+                   FileReader reader);
+  // Writes all the pages to the given filename. Returns false on error.
+  bool SaveDocument(const char* filename, FileWriter writer);
+  bool SaveToBuffer(GenericVector<char>* buffer);
+
+  // Adds the given page data to this document, counting up memory.
+  void AddPageToDocument(ImageData* page);
+
+  const STRING& document_name() const {
+    SVAutoLock lock(&general_mutex_);
+    return document_name_;
+  }
+  int NumPages() const {
+    SVAutoLock lock(&general_mutex_);
+    return total_pages_;
+  }
+  inT64 memory_used() const {
+    SVAutoLock lock(&general_mutex_);
+    return memory_used_;
+  }
+  // If the given index is not currently loaded, loads it using a separate
+  // thread. Note: there are 4 cases:
+  // Document uncached: IsCached() returns false, total_pages_ < 0.
+  // Required page is available: IsPageAvailable returns true. In this case,
+  // total_pages_ > 0 and
+  // pages_offset_ <= index%total_pages_ <= pages_offset_+pages_.size()
+  // Pages are loaded, but the required one is not.
+  // The requested page is being loaded by LoadPageInBackground. In this case,
+  // index == pages_offset_. Once the loading starts, the pages lock is held
+  // until it completes, at which point IsPageAvailable will unblock and return
+  // true.
+  void LoadPageInBackground(int index);
+  // Returns a pointer to the page with the given index, modulo the total
+  // number of pages. Blocks until the background load is completed.
+  const ImageData* GetPage(int index);
+  // Returns true if the requested page is available, and provides a pointer,
+  // which may be NULL if the document is empty. May block, even though it
+  // doesn't guarantee to return true.
+  bool IsPageAvailable(int index, ImageData** page);
+  // Takes ownership of the given page index. The page is made NULL in *this.
+  ImageData* TakePage(int index) {
+    SVAutoLock lock(&pages_mutex_);
+    ImageData* page = pages_[index];
+    pages_[index] = NULL;
+    return page;
+  }
+  // Returns true if the document is currently loaded or in the process of
+  // loading.
+  bool IsCached() const { return NumPages() >= 0; }
+  // Removes all pages from memory and frees the memory, but does not forget
+  // the document metadata. Returns the memory saved.
+  inT64 UnCache();
+
+ private:
+  // Sets the value of total_pages_ behind a mutex.
+  void set_total_pages(int total) {
+    SVAutoLock lock(&general_mutex_);
+    total_pages_ = total;
+  }
+  void set_memory_used(inT64 memory_used) {
+    SVAutoLock lock(&general_mutex_);
+    memory_used_ = memory_used;
+  }
+  // Locks the pages_mutex_ and Loads as many pages can fit in max_memory_
+  // starting at index pages_offset_.
+  bool ReCachePages();
+
+ private:
+  // A name for this document.
+  STRING document_name_;
+  // The language of this document.
+  STRING lang_;
+  // A group of pages that corresponds in some loose way to a document.
+  PointerVector<ImageData> pages_;
+  // Page number of the first index in pages_.
+  int pages_offset_;
+  // Total number of pages in document (may exceed size of pages_.)
+  int total_pages_;
+  // Total of all pix sizes in the document.
+  inT64 memory_used_;
+  // Max memory to use at any time.
+  inT64 max_memory_;
+  // Saved reader from LoadDocument to allow re-caching.
+  FileReader reader_;
+  // Mutex that protects pages_ and pages_offset_ against multiple parallel
+  // loads, and provides a wait for page.
+  SVMutex pages_mutex_;
+  // Mutex that protects other data members that callers want to access without
+  // waiting for a load operation.
+  mutable SVMutex general_mutex_;
+};
+
+// A collection of DocumentData that knows roughly how much memory it is using.
+// Note that while it supports background read-ahead, it assumes that a single
+// thread is accessing documents, ie it is not safe for multiple threads to
+// access different documents in parallel, as one may de-cache the other's
+// content.
+class DocumentCache {
+ public:
+  explicit DocumentCache(inT64 max_memory);
+  ~DocumentCache();
+
+  // Deletes all existing documents from the cache.
+  void Clear() {
+    documents_.clear();
+    num_pages_per_doc_ = 0;
+  }
+  // Adds all the documents in the list of filenames, counting memory.
+  // The reader is used to read the files.
+  bool LoadDocuments(const GenericVector<STRING>& filenames, const char* lang,
+                     CachingStrategy cache_strategy, FileReader reader);
+
+  // Adds document to the cache.
+  bool AddToCache(DocumentData* data);
+
+  // Finds and returns a document by name.
+  DocumentData* FindDocument(const STRING& document_name) const;
+
+  // Returns a page by serial number using the current cache_strategy_ to
+  // determine the mapping from serial number to page.
+  const ImageData* GetPageBySerial(int serial) {
+    if (cache_strategy_ == CS_SEQUENTIAL)
+      return GetPageSequential(serial);
+    else
+      return GetPageRoundRobin(serial);
+  }
+
+  const PointerVector<DocumentData>& documents() const {
+    return documents_;
+  }
+  // Returns the total number of pages in an epoch. For CS_ROUND_ROBIN cache
+  // strategy, could take a long time.
+  int TotalPages();
+
+ private:
+  // Returns a page by serial number, selecting them in a round-robin fashion
+  // from all the documents. Highly disk-intensive, but doesn't need samples
+  // to be shuffled between files to begin with.
+  const ImageData* GetPageRoundRobin(int serial);
+  // Returns a page by serial number, selecting them in sequence from each file.
+  // Requires the samples to be shuffled between the files to give a random or
+  // uniform distribution of data. Less disk-intensive than GetPageRoundRobin.
+  const ImageData* GetPageSequential(int serial);
+
+  // Helper counts the number of adjacent cached neighbour documents_ of index
+  // looking in direction dir, ie index+dir, index+2*dir etc.
+  int CountNeighbourDocs(int index, int dir);
+
+  // A group of pages that corresponds in some loose way to a document.
+  PointerVector<DocumentData> documents_;
+  // Strategy to use for caching and serializing data samples.
+  CachingStrategy cache_strategy_;
+  // Number of pages in the first document, used as a divisor in
+  // GetPageSequential to determine the document index.
+  int num_pages_per_doc_;
+  // Max memory allowed in this cache.
+  inT64 max_memory_;
+};
+
+}  // namespace tesseract
+
+
+#endif  // TESSERACT_IMAGE_IMAGEDATA_H_
--- a/Show More
+++ b/Show More