1.修复OCR 300DPI bug

2.优化跳过空白页算法
2020-08-15 16:06:44 +08:00 · 2020-08-15 16:06:44 +08:00 · ac12acc233
parent 19c001a85c
commit ac12acc233
5 changed files with 285 additions and 25 deletions
--- a/huagao/ImageProcess/ImageApplyDiscardBlank.cpp
+++ b/huagao/ImageProcess/ImageApplyDiscardBlank.cpp
@ -1,9 +1,8 @@
 #include "ImageApplyDiscardBlank.h"
 #include "ImageProcess_Public.h"
-
-CImageApplyDiscardBlank::CImageApplyDiscardBlank(bool isnormal)
+#include "filetools.h"
+CImageApplyDiscardBlank::CImageApplyDiscardBlank()
 	: m_res(false)
-	, isNormalDiscard(isnormal)
 	, dSize(200)
 	, devTh(15, 15, 15, 15)
 {
@ -88,15 +87,15 @@ int CImageApplyDiscardBlank::processRectR(const cv::Mat& image, cv::RotatedRect&

  bool CImageApplyDiscardBlank::scalar_LE(const cv::Scalar& val1, const cv::Scalar& val2)
 {
-	for(int i = 0; i < 3; i++)
-		if(val1[i] > val2[i])
+	for (int i = 0; i < 3; i++)
+		if (val1[i] > val2[i])
 			return false;
 	return true;
 }

 void CImageApplyDiscardBlank::setIntensity(int val)
 {
-	val = cv::max(cv::min(20, val), 2); 
+	val = cv::max(cv::min(100, val), 1); 
 	devTh = cv::Scalar(val, val, val, val);
 }

@ -117,7 +116,7 @@ cv::Mat CImageApplyDiscardBlank::getRoiMat(const cv::Mat& image)
 	return image(inRect);
 }

-void CImageApplyDiscardBlank::apply(cv::Mat& pDib,int side)
+void CImageApplyDiscardBlank::apply(cv::Mat& pDib, int side)
 {
 #ifdef LOG
 	FileTools::write_log("imgprc.txt", "enter CImageApplyDiscardBlank apply");
@ -131,9 +130,6 @@ void CImageApplyDiscardBlank::apply(cv::Mat& pDib,int side)
 		return;
 	}

-	setIntensity(isNormalDiscard ? 8 : 20);
-	setMinArea(isNormalDiscard ? 200 : 300);
-
 	cv::Scalar mean;  
 	cv::Scalar dev;  
 	cv::Mat image = getRoiMat(pDib);
@ -150,7 +146,7 @@ void CImageApplyDiscardBlank::apply(cv::Mat& pDib,int side)
 				{
 					m_res = false;
 #ifdef LOG
-					FileTools::write_log("imgprc.txt", "exit CImageApplyDiscardBlank apply");
+					FileTools::write_log("D:\\imgprc.txt", "CImageApplyDiscardBlank blank");
 #endif // LOG
 					return;
 				}
@ -166,14 +162,13 @@ void CImageApplyDiscardBlank::apply(cv::Mat& pDib,int side)

 void CImageApplyDiscardBlank::apply(std::vector<cv::Mat>& mats, bool isTwoSide)
 {
-	if (mats.empty()) return;
-
-	if (!mats[0].empty()) {
-		apply(mats[0], 0);
+    (void)isTwoSide;
+	int i = 0;
+	for (cv::Mat& var : mats) {
+		if (i != 0 && isTwoSide == false)
+			break;
+		if (!var.empty())
+			apply(var, 0);
+		i++;
 	}
-
-	if (isTwoSide && mats.size() > 1) {
-		if (!mats[1].empty())
-			apply(mats[1], 1);
-	}
-}
+}
--- a/huagao/ImageProcess/ImageApplyDiscardBlank.h
+++ b/huagao/ImageProcess/ImageApplyDiscardBlank.h
@ -1,3 +1,16 @@
+/*
+ * ====================================================
+
+ * 功能：空白页识别。
+ * 作者：刘丁维
+ * 生成时间：2020/4/21
+ * 最近修改时间：2020/4/21  v1.0
+				 2020/8/12  v1.1 开放setIntensity和setMinArea；取消isNormal标识位；扩大setIntensity的设置范围，从[2, 20]扩大到[1, 100]
+ * 版本号：v1.1
+
+ * ====================================================
+ */
+
 #ifndef IMAGE_APPLY_DISCARD_BLANK_H
 #define IMAGE_APPLY_DISCARD_BLANK_H

@ -6,19 +19,23 @@
 class CImageApplyDiscardBlank : public CImageApply
 {
 public:
-	CImageApplyDiscardBlank(bool isnormal = true);
+	/*
+	 * isnormal  [in]:true标准模式，false为票据复写纸模式
+	 * */
+	CImageApplyDiscardBlank();

 	virtual ~CImageApplyDiscardBlank(void);

-	virtual void apply(cv::Mat& pDib,int  side); 
+	virtual void apply(cv::Mat& pDib,int side); 

 	virtual void apply(std::vector<cv::Mat>& mats, bool isTwoSide);

-private:
 	void setIntensity(int val);

 	void setMinArea(int val) { dSize = val; }

+private:
+
 	int processRectR(const cv::Mat& image, cv::RotatedRect& rotatedRect, std::vector<cv::Point>& maxContour, 
 		double scale, double thresh, int blobAreaSize);
 	
--- a/huagao/ImageProcess/ImageApplyRotation.cpp
+++ b/huagao/ImageProcess/ImageApplyRotation.cpp
@ -50,7 +50,7 @@ void CImageApplyRotation::apply(cv::Mat & pDib, int side)
 			if (m_dpi != 200)
 			{
 				double scale = 200 / static_cast<double>(m_dpi);
-				int new_w = (pDib.cols * scale + 3) / 4 * 4;
+				int new_w = static_cast<int>(pDib.cols * scale) / 4 * 4;
 				int new_h = pDib.rows * scale;
 				cv::resize(pDib, temp, cv::Size(new_w, new_h));
 			}
--- a/huagao/ImageProcess/hg_gpdf.cpp
+++ b/huagao/ImageProcess/hg_gpdf.cpp
@ -0,0 +1,163 @@
+#include "hg_gpdf.h"
+
+#if defined(WIN32)
+#include "baseapi.h"
+#include "allheaders.h"
+#include "renderer.h"
+#else
+#include <leptonica/allheaders.h>
+#include <tesseract/baseapi.h>
+#include <tesseract/renderer.h>
+#endif
+
+//#define USE_QT
+#ifdef USE_QT
+#include <QDebug>
+#include <QTime>
+#endif
+
+Pix* createPix(const unsigned char * imagedata, int width, int height, int bytes_per_pixel, int bytes_per_line, int dpi)
+{
+    int bpp = bytes_per_pixel * 8;
+    if (bpp == 0) bpp = 1;
+    Pix* pix = pixCreate(width, height, bpp == 24 ? 32 : bpp);
+    pixSetXRes(pix, dpi);
+    pixSetYRes(pix, dpi);
+    l_uint32* data = pixGetData(pix);
+    int wpl = pixGetWpl(pix);
+    switch (bpp)
+    {
+    case 1:
+        for (int y = 0; y < height; ++y, data += wpl, imagedata += bytes_per_line)
+            for (int x = 0; x < width; ++x)
+                if (imagedata[x / 8] & (0x80 >> (x % 8)))
+                    CLEAR_DATA_BIT(data, x);
+                else
+                    SET_DATA_BIT(data, x);
+        break;
+
+    case 8:
+        // Greyscale just copies the bytes in the right order.
+        for (int y = 0; y < height; ++y, data += wpl, imagedata += bytes_per_line)
+            for (int x = 0; x < width; ++x)
+                SET_DATA_BYTE(data, x, imagedata[x]);
+        break;
+
+    case 24:
+        // Put the colors in the correct places in the line buffer.
+        for (int y = 0; y < height; ++y, imagedata += bytes_per_line)
+            for (int x = 0; x < width; ++x, ++data) {
+                SET_DATA_BYTE(data, COLOR_RED, imagedata[3 * x]);
+                SET_DATA_BYTE(data, COLOR_GREEN, imagedata[3 * x + 1]);
+                SET_DATA_BYTE(data, COLOR_BLUE, imagedata[3 * x + 2]);
+            }
+        break;
+
+    case 32:
+        // Maintain byte order consistency across different endianness.
+        for (int y = 0; y < height; ++y, imagedata += bytes_per_line, data += wpl)
+            for (int x = 0; x < width; ++x)
+                data[x] = static_cast<l_uint32>((imagedata[x * 4] << 24) | (imagedata[x * 4 + 1] << 16) |
+                (imagedata[x * 4 + 2] << 8) | imagedata[x * 4 + 3]);
+        break;
+
+    default:
+        break;
+    }
+
+    pix->informat = bytes_per_pixel == 1 ? 1 : 2;
+    if (bytes_per_pixel == 1)
+    {
+        PIXCMAP* colormap = pixcmapCreate(8);
+        LEPT_FREE(colormap->array);
+        colormap->array = reinterpret_cast<l_uint8*>(LEPT_CALLOC(256, sizeof(RGBA_QUAD)));
+        colormap->n = 256;
+        colormap->nalloc = 256;
+        colormap->depth = 8;
+        l_uint8* ptr = reinterpret_cast<l_uint8*>(colormap->array);
+        for (int i = 0; i < 256; i++)
+            ptr[i * 4 + 0] = ptr[i * 4 + 1] = ptr[i * 4 + 2] = ptr[i * 4 + 3] = static_cast<l_uint8>(i);
+        pixSetColormap(pix, colormap);
+    }
+    pixSetXRes(pix, 200);
+    pixSetYRes(pix, 200);
+    return pix;
+}
+
+HG_OCR::HG_OCR()
+    : m_ptr(nullptr)
+{
+}
+
+HG_OCR::~HG_OCR()
+{
+    if (m_ptr)
+    {
+        reinterpret_cast<tesseract::TessBaseAPI*>(m_ptr)->End();
+        delete reinterpret_cast<tesseract::TessBaseAPI*>(m_ptr);
+    }
+}
+
+int HG_OCR::init(const char* trainFile, RECOGNITION_MODE mode)
+{
+    if (m_ptr) delete reinterpret_cast<tesseract::TessBaseAPI*>(m_ptr);
+    tesseract::TessBaseAPI* api = new tesseract::TessBaseAPI();
+    api->SetPageSegMode(tesseract::PSM_AUTO_OSD);
+    m_ptr = reinterpret_cast<void*>(api);
+    return  api->Init(trainFile, mode == RECOGNITION_OCR ? "chi_sim" : "osd");
+}
+
+bool HG_OCR::ocr(const char** inputFileNames, int numOfFiles, const char* outputFileName, SAVE_MODE flag)
+{
+    if (m_ptr == nullptr) return false;
+    if (inputFileNames == nullptr || numOfFiles == 0) return false;
+
+    tesseract::PointerVector<tesseract::TessResultRenderer> renderers;
+    tesseract::TessBaseAPI *api = reinterpret_cast<tesseract::TessBaseAPI*>(m_ptr);
+    if (flag & SAVE_PDF)
+        renderers.push_back(new tesseract::TessPDFRenderer(outputFileName, api->GetDatapath(), false));
+    if (flag & SAVE_TXT)
+        renderers.push_back(new tesseract::TessTextRenderer(outputFileName));
+
+#ifdef USE_QT
+    QTime timer;
+    timer.start();
+#endif
+
+    int numOfRenderer = renderers.length();
+    for (int i = 0; i < numOfRenderer; i++)
+        renderers[i]->BeginDocument("");
+
+    for (int i = 0; i < numOfFiles; i++)
+    {
+        Pix* img = pixRead(inputFileNames[i]);
+        api->SetInputName(outputFileName);
+        api->SetImage(img);
+        api->Recognize(nullptr);
+        for (int i = 0; i < numOfRenderer; i++)
+            renderers[i]->AddImage(api);
+        pixDestroy(&img);
+    }
+
+    for (int i = 0; i < numOfRenderer; i++)
+        renderers[i]->EndDocument();
+
+    renderers.clear();
+
+#ifdef USE_QT
+    qDebug() << timer.elapsed();
+#endif
+
+    return true;
+}
+
+int HG_OCR::orientation(const unsigned char *data, int width, int height, int channels, int dpi)
+{
+    Pix* pix = createPix(data, width, height, channels, (width * channels + 3) / 4 * 4, dpi);
+    tesseract::TessBaseAPI *api = reinterpret_cast<tesseract::TessBaseAPI*>(m_ptr);
+    api->SetImage(pix);
+    int orient_deg = 1;
+    api->DetectOrientationScript(&orient_deg, nullptr, nullptr, nullptr);
+    pixDestroy(&pix);
+    return orient_deg;
+}
--- a/huagao/ImageProcess/hg_gpdf.h
+++ b/huagao/ImageProcess/hg_gpdf.h
@ -0,0 +1,85 @@
+/*
+* ====================================================
+
+* 功能：本地图片进行OCR，并生成多页PDF功能
+* 作者：刘丁维
+* 生成时间：2020/3/5
+* 最近修改时间：2020/3/5
+* 版本号：v1.0
+
+* ====================================================
+*/
+
+#ifndef HG_GPDF_H
+#define HG_GPDF_H
+
+#if defined (_WIN32)
+    #if !defined (HG_GPDF_API_BUILD)
+        #define HG_GPDF_API //__declspec(dllexport)
+    #else
+        #define HG_GPDF_API //__declspec(dllimport)
+    #endif
+#else
+    #define HG_GPDF_API
+#endif
+
+class HG_GPDF_API HG_OCR
+{
+public:
+
+    //识别模式
+    enum RECOGNITION_MODE
+    {
+        RECOGNITION_OSD,    //文稿方向识别模式
+        RECOGNITION_OCR     //字符识别模式
+    };
+
+    //字符识别保存模式
+    enum SAVE_MODE
+    {
+        SAVE_PDF     = 0x01,    //PDF保存字符识别结果
+        SAVE_TXT     = 0x02,    //TXT保存字符识别结果
+        SAVE_PDF_TXT = 0x03     //同时采用PDF和TXT保存字符识别结果
+    };
+public:
+
+    HG_OCR();
+
+    ~HG_OCR();
+
+    /*
+    * 函数功能：初始化PDF生成器
+    * trainFile:[in] 训练库文件路径，不含文件名，末尾无需'/'结尾
+    * mode：[in] 具体选项参照 enum RECOGNITION_MODE，默认值为RECOGNITION_OCR
+    * 返回值：0为成功，否则为异常。异常查看打印输出
+    */
+    int init(const char* trainFile, RECOGNITION_MODE mode = RECOGNITION_OCR);
+
+    /*
+    * 函数功能：字符识别，并生成文本
+    * inputFileNames:[in] 本地待识别图片完整路径，可同时输入多个图片路径
+    * numOfFiles：[in] inputFileNames长度
+    * numOfFiles：[in] inputFileNames长度
+    * outputFileName：[in] 识别结果保存路径，包含文件基础名字，不含文件后缀名
+    * flag：[in] 字符识别保存模式，参照 enum SAVE_MODE
+    * 返回值：true为成功，否则为异常。异常查看打印输出
+    */
+    bool ocr(const char** inputFileNames, int numOfFiles, const char* outputFileName, SAVE_MODE flag = SAVE_PDF);
+
+    /*
+    * 函数功能：文本方向识别
+    * data:[in] 图像数据指针
+    * width：[in] 图像宽度
+    * height：[in] 图像高度
+    * channels：[in] 图像通道
+    * dpi：[in] 图片DPI
+    * 返回值：以顺时针进行计数，输出0,90,180,270四种结果
+    */
+    int orientation(const unsigned char* data, int width, int height, int channels, int dpi);
+
+private:
+    void* m_ptr;
+};
+
+
+#endif //HG_GPDF_H