From ac12acc233773076e904fa087580e1718ebfbc0c Mon Sep 17 00:00:00 2001 From: lovelyyoung <1002639516@qq.com> Date: Sat, 15 Aug 2020 16:06:44 +0800 Subject: [PATCH] =?UTF-8?q?1.=E4=BF=AE=E5=A4=8DOCR=20300DPI=20bug=202.?= =?UTF-8?q?=E4=BC=98=E5=8C=96=E8=B7=B3=E8=BF=87=E7=A9=BA=E7=99=BD=E9=A1=B5?= =?UTF-8?q?=E7=AE=97=E6=B3=95?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../ImageProcess/ImageApplyDiscardBlank.cpp | 37 ++-- huagao/ImageProcess/ImageApplyDiscardBlank.h | 23 ++- huagao/ImageProcess/ImageApplyRotation.cpp | 2 +- huagao/ImageProcess/hg_gpdf.cpp | 163 ++++++++++++++++++ huagao/ImageProcess/hg_gpdf.h | 85 +++++++++ 5 files changed, 285 insertions(+), 25 deletions(-) create mode 100644 huagao/ImageProcess/hg_gpdf.cpp create mode 100644 huagao/ImageProcess/hg_gpdf.h diff --git a/huagao/ImageProcess/ImageApplyDiscardBlank.cpp b/huagao/ImageProcess/ImageApplyDiscardBlank.cpp index 7d28c26f..02052262 100644 --- a/huagao/ImageProcess/ImageApplyDiscardBlank.cpp +++ b/huagao/ImageProcess/ImageApplyDiscardBlank.cpp @@ -1,9 +1,8 @@ #include "ImageApplyDiscardBlank.h" #include "ImageProcess_Public.h" - -CImageApplyDiscardBlank::CImageApplyDiscardBlank(bool isnormal) +#include "filetools.h" +CImageApplyDiscardBlank::CImageApplyDiscardBlank() : m_res(false) - , isNormalDiscard(isnormal) , dSize(200) , devTh(15, 15, 15, 15) { @@ -88,15 +87,15 @@ int CImageApplyDiscardBlank::processRectR(const cv::Mat& image, cv::RotatedRect& bool CImageApplyDiscardBlank::scalar_LE(const cv::Scalar& val1, const cv::Scalar& val2) { - for(int i = 0; i < 3; i++) - if(val1[i] > val2[i]) + for (int i = 0; i < 3; i++) + if (val1[i] > val2[i]) return false; return true; } void CImageApplyDiscardBlank::setIntensity(int val) { - val = cv::max(cv::min(20, val), 2); + val = cv::max(cv::min(100, val), 1); devTh = cv::Scalar(val, val, val, val); } @@ -117,7 +116,7 @@ cv::Mat CImageApplyDiscardBlank::getRoiMat(const cv::Mat& image) return image(inRect); } -void CImageApplyDiscardBlank::apply(cv::Mat& pDib,int side) +void CImageApplyDiscardBlank::apply(cv::Mat& pDib, int side) { #ifdef LOG FileTools::write_log("imgprc.txt", "enter CImageApplyDiscardBlank apply"); @@ -131,9 +130,6 @@ void CImageApplyDiscardBlank::apply(cv::Mat& pDib,int side) return; } - setIntensity(isNormalDiscard ? 8 : 20); - setMinArea(isNormalDiscard ? 200 : 300); - cv::Scalar mean; cv::Scalar dev; cv::Mat image = getRoiMat(pDib); @@ -150,7 +146,7 @@ void CImageApplyDiscardBlank::apply(cv::Mat& pDib,int side) { m_res = false; #ifdef LOG - FileTools::write_log("imgprc.txt", "exit CImageApplyDiscardBlank apply"); + FileTools::write_log("D:\\imgprc.txt", "CImageApplyDiscardBlank blank"); #endif // LOG return; } @@ -166,14 +162,13 @@ void CImageApplyDiscardBlank::apply(cv::Mat& pDib,int side) void CImageApplyDiscardBlank::apply(std::vector& mats, bool isTwoSide) { - if (mats.empty()) return; - - if (!mats[0].empty()) { - apply(mats[0], 0); + (void)isTwoSide; + int i = 0; + for (cv::Mat& var : mats) { + if (i != 0 && isTwoSide == false) + break; + if (!var.empty()) + apply(var, 0); + i++; } - - if (isTwoSide && mats.size() > 1) { - if (!mats[1].empty()) - apply(mats[1], 1); - } -} \ No newline at end of file +} diff --git a/huagao/ImageProcess/ImageApplyDiscardBlank.h b/huagao/ImageProcess/ImageApplyDiscardBlank.h index 70394911..f1822dcc 100644 --- a/huagao/ImageProcess/ImageApplyDiscardBlank.h +++ b/huagao/ImageProcess/ImageApplyDiscardBlank.h @@ -1,3 +1,16 @@ +/* + * ==================================================== + + * 功能:空白页识别。 + * 作者:刘丁维 + * 生成时间:2020/4/21 + * 最近修改时间:2020/4/21 v1.0 + 2020/8/12 v1.1 开放setIntensity和setMinArea;取消isNormal标识位;扩大setIntensity的设置范围,从[2, 20]扩大到[1, 100] + * 版本号:v1.1 + + * ==================================================== + */ + #ifndef IMAGE_APPLY_DISCARD_BLANK_H #define IMAGE_APPLY_DISCARD_BLANK_H @@ -6,19 +19,23 @@ class CImageApplyDiscardBlank : public CImageApply { public: - CImageApplyDiscardBlank(bool isnormal = true); + /* + * isnormal [in]:true标准模式,false为票据复写纸模式 + * */ + CImageApplyDiscardBlank(); virtual ~CImageApplyDiscardBlank(void); - virtual void apply(cv::Mat& pDib,int side); + virtual void apply(cv::Mat& pDib,int side); virtual void apply(std::vector& mats, bool isTwoSide); -private: void setIntensity(int val); void setMinArea(int val) { dSize = val; } +private: + int processRectR(const cv::Mat& image, cv::RotatedRect& rotatedRect, std::vector& maxContour, double scale, double thresh, int blobAreaSize); diff --git a/huagao/ImageProcess/ImageApplyRotation.cpp b/huagao/ImageProcess/ImageApplyRotation.cpp index 6b685614..a5554f2b 100644 --- a/huagao/ImageProcess/ImageApplyRotation.cpp +++ b/huagao/ImageProcess/ImageApplyRotation.cpp @@ -50,7 +50,7 @@ void CImageApplyRotation::apply(cv::Mat & pDib, int side) if (m_dpi != 200) { double scale = 200 / static_cast(m_dpi); - int new_w = (pDib.cols * scale + 3) / 4 * 4; + int new_w = static_cast(pDib.cols * scale) / 4 * 4; int new_h = pDib.rows * scale; cv::resize(pDib, temp, cv::Size(new_w, new_h)); } diff --git a/huagao/ImageProcess/hg_gpdf.cpp b/huagao/ImageProcess/hg_gpdf.cpp new file mode 100644 index 00000000..bf0546c3 --- /dev/null +++ b/huagao/ImageProcess/hg_gpdf.cpp @@ -0,0 +1,163 @@ +#include "hg_gpdf.h" + +#if defined(WIN32) +#include "baseapi.h" +#include "allheaders.h" +#include "renderer.h" +#else +#include +#include +#include +#endif + +//#define USE_QT +#ifdef USE_QT +#include +#include +#endif + +Pix* createPix(const unsigned char * imagedata, int width, int height, int bytes_per_pixel, int bytes_per_line, int dpi) +{ + int bpp = bytes_per_pixel * 8; + if (bpp == 0) bpp = 1; + Pix* pix = pixCreate(width, height, bpp == 24 ? 32 : bpp); + pixSetXRes(pix, dpi); + pixSetYRes(pix, dpi); + l_uint32* data = pixGetData(pix); + int wpl = pixGetWpl(pix); + switch (bpp) + { + case 1: + for (int y = 0; y < height; ++y, data += wpl, imagedata += bytes_per_line) + for (int x = 0; x < width; ++x) + if (imagedata[x / 8] & (0x80 >> (x % 8))) + CLEAR_DATA_BIT(data, x); + else + SET_DATA_BIT(data, x); + break; + + case 8: + // Greyscale just copies the bytes in the right order. + for (int y = 0; y < height; ++y, data += wpl, imagedata += bytes_per_line) + for (int x = 0; x < width; ++x) + SET_DATA_BYTE(data, x, imagedata[x]); + break; + + case 24: + // Put the colors in the correct places in the line buffer. + for (int y = 0; y < height; ++y, imagedata += bytes_per_line) + for (int x = 0; x < width; ++x, ++data) { + SET_DATA_BYTE(data, COLOR_RED, imagedata[3 * x]); + SET_DATA_BYTE(data, COLOR_GREEN, imagedata[3 * x + 1]); + SET_DATA_BYTE(data, COLOR_BLUE, imagedata[3 * x + 2]); + } + break; + + case 32: + // Maintain byte order consistency across different endianness. + for (int y = 0; y < height; ++y, imagedata += bytes_per_line, data += wpl) + for (int x = 0; x < width; ++x) + data[x] = static_cast((imagedata[x * 4] << 24) | (imagedata[x * 4 + 1] << 16) | + (imagedata[x * 4 + 2] << 8) | imagedata[x * 4 + 3]); + break; + + default: + break; + } + + pix->informat = bytes_per_pixel == 1 ? 1 : 2; + if (bytes_per_pixel == 1) + { + PIXCMAP* colormap = pixcmapCreate(8); + LEPT_FREE(colormap->array); + colormap->array = reinterpret_cast(LEPT_CALLOC(256, sizeof(RGBA_QUAD))); + colormap->n = 256; + colormap->nalloc = 256; + colormap->depth = 8; + l_uint8* ptr = reinterpret_cast(colormap->array); + for (int i = 0; i < 256; i++) + ptr[i * 4 + 0] = ptr[i * 4 + 1] = ptr[i * 4 + 2] = ptr[i * 4 + 3] = static_cast(i); + pixSetColormap(pix, colormap); + } + pixSetXRes(pix, 200); + pixSetYRes(pix, 200); + return pix; +} + +HG_OCR::HG_OCR() + : m_ptr(nullptr) +{ +} + +HG_OCR::~HG_OCR() +{ + if (m_ptr) + { + reinterpret_cast(m_ptr)->End(); + delete reinterpret_cast(m_ptr); + } +} + +int HG_OCR::init(const char* trainFile, RECOGNITION_MODE mode) +{ + if (m_ptr) delete reinterpret_cast(m_ptr); + tesseract::TessBaseAPI* api = new tesseract::TessBaseAPI(); + api->SetPageSegMode(tesseract::PSM_AUTO_OSD); + m_ptr = reinterpret_cast(api); + return api->Init(trainFile, mode == RECOGNITION_OCR ? "chi_sim" : "osd"); +} + +bool HG_OCR::ocr(const char** inputFileNames, int numOfFiles, const char* outputFileName, SAVE_MODE flag) +{ + if (m_ptr == nullptr) return false; + if (inputFileNames == nullptr || numOfFiles == 0) return false; + + tesseract::PointerVector renderers; + tesseract::TessBaseAPI *api = reinterpret_cast(m_ptr); + if (flag & SAVE_PDF) + renderers.push_back(new tesseract::TessPDFRenderer(outputFileName, api->GetDatapath(), false)); + if (flag & SAVE_TXT) + renderers.push_back(new tesseract::TessTextRenderer(outputFileName)); + +#ifdef USE_QT + QTime timer; + timer.start(); +#endif + + int numOfRenderer = renderers.length(); + for (int i = 0; i < numOfRenderer; i++) + renderers[i]->BeginDocument(""); + + for (int i = 0; i < numOfFiles; i++) + { + Pix* img = pixRead(inputFileNames[i]); + api->SetInputName(outputFileName); + api->SetImage(img); + api->Recognize(nullptr); + for (int i = 0; i < numOfRenderer; i++) + renderers[i]->AddImage(api); + pixDestroy(&img); + } + + for (int i = 0; i < numOfRenderer; i++) + renderers[i]->EndDocument(); + + renderers.clear(); + +#ifdef USE_QT + qDebug() << timer.elapsed(); +#endif + + return true; +} + +int HG_OCR::orientation(const unsigned char *data, int width, int height, int channels, int dpi) +{ + Pix* pix = createPix(data, width, height, channels, (width * channels + 3) / 4 * 4, dpi); + tesseract::TessBaseAPI *api = reinterpret_cast(m_ptr); + api->SetImage(pix); + int orient_deg = 1; + api->DetectOrientationScript(&orient_deg, nullptr, nullptr, nullptr); + pixDestroy(&pix); + return orient_deg; +} diff --git a/huagao/ImageProcess/hg_gpdf.h b/huagao/ImageProcess/hg_gpdf.h new file mode 100644 index 00000000..4f77d513 --- /dev/null +++ b/huagao/ImageProcess/hg_gpdf.h @@ -0,0 +1,85 @@ +/* +* ==================================================== + +* 功能:本地图片进行OCR,并生成多页PDF功能 +* 作者:刘丁维 +* 生成时间:2020/3/5 +* 最近修改时间:2020/3/5 +* 版本号:v1.0 + +* ==================================================== +*/ + +#ifndef HG_GPDF_H +#define HG_GPDF_H + +#if defined (_WIN32) + #if !defined (HG_GPDF_API_BUILD) + #define HG_GPDF_API //__declspec(dllexport) + #else + #define HG_GPDF_API //__declspec(dllimport) + #endif +#else + #define HG_GPDF_API +#endif + +class HG_GPDF_API HG_OCR +{ +public: + + //识别模式 + enum RECOGNITION_MODE + { + RECOGNITION_OSD, //文稿方向识别模式 + RECOGNITION_OCR //字符识别模式 + }; + + //字符识别保存模式 + enum SAVE_MODE + { + SAVE_PDF = 0x01, //PDF保存字符识别结果 + SAVE_TXT = 0x02, //TXT保存字符识别结果 + SAVE_PDF_TXT = 0x03 //同时采用PDF和TXT保存字符识别结果 + }; +public: + + HG_OCR(); + + ~HG_OCR(); + + /* + * 函数功能:初始化PDF生成器 + * trainFile:[in] 训练库文件路径,不含文件名,末尾无需'/'结尾 + * mode:[in] 具体选项参照 enum RECOGNITION_MODE,默认值为RECOGNITION_OCR + * 返回值:0为成功,否则为异常。异常查看打印输出 + */ + int init(const char* trainFile, RECOGNITION_MODE mode = RECOGNITION_OCR); + + /* + * 函数功能:字符识别,并生成文本 + * inputFileNames:[in] 本地待识别图片完整路径,可同时输入多个图片路径 + * numOfFiles:[in] inputFileNames长度 + * numOfFiles:[in] inputFileNames长度 + * outputFileName:[in] 识别结果保存路径,包含文件基础名字,不含文件后缀名 + * flag:[in] 字符识别保存模式,参照 enum SAVE_MODE + * 返回值:true为成功,否则为异常。异常查看打印输出 + */ + bool ocr(const char** inputFileNames, int numOfFiles, const char* outputFileName, SAVE_MODE flag = SAVE_PDF); + + /* + * 函数功能:文本方向识别 + * data:[in] 图像数据指针 + * width:[in] 图像宽度 + * height:[in] 图像高度 + * channels:[in] 图像通道 + * dpi:[in] 图片DPI + * 返回值:以顺时针进行计数,输出0,90,180,270四种结果 + */ + int orientation(const unsigned char* data, int width, int height, int channels, int dpi); + +private: + void* m_ptr; +}; + + +#endif //HG_GPDF_H