code_device/hgdriver/ImageProcess/ImageApplyDiscardBlank.h

326 lines
8.9 KiB
C++
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

/*
* ====================================================
* 功能:空白页识别。
* 作者:刘丁维
* 生成时间2020/4/21
* 最近修改时间2020/4/21 v1.0
2020/8/12 v1.1 开放setIntensity和setMinArea取消isNormal标识位扩大setIntensity的设置范围从[2, 20]扩大到[1, 100]
2020/8/25 v1.1.1 纸张检测缩进从100像素调整到20像素
2020/10/16 v1.2 添加新接口,能够高效便捷判断图片是否为空白页
2020/10/19 v1.2.1 修复静态空白页判断识别误判的BUG
2021/04/13 v1.3.0 增加标准/票据标识位
2021/08/12 v1.3.1 添加防止不同opencv版本导致计算结果存在差异的代码。
2021/12/14 v1.3.2 重构算法。
2021/12/15 v1.3.3 微调参数。
2021/12/17 v1.3.4 增加背景色接口,实现对纯色纸张的空白页判定
2022/09/07 v1.3.5 修复部分参数传递的BUG
2022/09/19 v1.4 增加模糊处理,提高空白页的过滤能力
2022/09/19 v1.4.1 调整模糊处理步骤
2022/11/18 v1.4.2 调整默认参数
2022/11/29 v1.5 增加纸张杂点忽略功能
2022/12/03 v1.5.1 调整纸张杂点忽略逻辑,避免把细条纹(有效信息)给忽略掉;默认将图像按照灰度图进行识别。
2023/10/12 v1.6 添加新的空白页识别方案。采用JEPG文件大小判断是否为空白页。
2023/10/20 v1.6.1 优化JPEG文件大小判断空白页
2023/10/30 v1.7 调整JPEG文件大小判断空白页的算法接口
2023/11/04 v1.7.1 增加PNG二值化文件大小判断空白页的选项
* 版本号v1.7.1
* ====================================================
*/
#ifndef IMAGE_APPLY_DISCARD_BLANK_H
#define IMAGE_APPLY_DISCARD_BLANK_H
#include "ImageApply.h"
#include <cmath>
#include <stdint.h>
#include <utility>
#include <string>
#include <vector>
#include <iostream>
#include <unordered_map>
#include <map>
class MyFStream
{
public:
enum Seekdir
{
Begin,
End
};
public:
MyFStream(const char* data, int length);
bool eof()
{
return m_pos >= m_length;
}
bool fail() { return (m_data == nullptr) || (m_length <= 0); }
void read(char* dst, int len);
void read_reverse(char* dst, int len);
unsigned char get()
{
if (m_pos >= m_length)
return 0xD9;
unsigned char d = m_data[m_pos];
m_pos++;
return d;
}
void seekg(int offset, Seekdir dir)
{
switch (dir)
{
case MyFStream::Begin:
m_pos = offset;
break;
case MyFStream::End:
m_pos = m_length + offset;
break;
default:
break;
}
}
int tellg() { return m_pos; }
void move(int step) { m_pos += step; }
private:
const char* m_data;
int m_length;
int m_pos;
};
class png
{
public:
png();
~png();
bool read(const char* data, int length, double threshold = 0.025);
};
#define ROW 8
#define COL 8
#define HUFFMAN_DECODE_DEQUE_CACHE 64//单位:位
// #define _DEBUG_
// #define _DEBUGOUT_
#define FREE_VECTOR_LP(vectorName) \
for(auto item : vectorName){ \
for(int i=0;i<ROW;i++)\
delete [] item[i];\
delete [] item; \
}\
vectorName.clear();
//释放二维指针
#define FREE_LP_2(lpName,row) \
for(int i=0;i<row;i++){\
delete [] lpName[i];\
}\
delete [] lpName;
//段类型
enum JPEGPType {
SOF0 = 0xC0, //帧开始
SOF1 = 0xC1, //帧开始
SOF2 = 0xC2, //帧开始
DHT = 0xC4, //哈夫曼表
SOI = 0xD8, //文件头
EOI = 0xD9, //文件尾
SOS = 0xDA, //扫描行开始
DQT = 0xDB, //定义量化表
DRI = 0xDD, //定义重新开始间隔
APP0 = 0xE0, //定义交换格式和图像识别信息
APP1 = 0xE1, //定义交换格式和图像识别信息
APP2 = 0xE2, //定义交换格式和图像识别信息
COM = 0xFE //注释
};
//将一维数组变为二维数组
double** UnZigZag(int* originArray);
struct RGB {
uint8_t red;
uint8_t green;
uint8_t blue;
};
//SOS
class JPEGScan {
public:
//componentId,<DC,AC>
std::map<uint8_t, std::pair<uint8_t, uint8_t>> componentHuffmanMap;
bool Init(MyFStream& file, uint16_t len);
};
//APP
class JPEGInfo {
public:
uint16_t version;
};
//DHT
class JPEGHuffmanCode {
public:
using iterator = std::map<uint16_t, std::pair<uint8_t, uint8_t>>::iterator;
//<code,<bit,weight>
std::map<uint16_t, std::pair<uint8_t, uint8_t>> table;
//init huffman table
bool Init(MyFStream& file, uint16_t len);
//find-true not find-false
bool findKey(const uint16_t& code, const uint8_t& bit, iterator& it);
};
//DQT
//quality table
class JPEGQuality {
public:
uint8_t precision;
uint8_t id;
std::vector<uint16_t> table;
bool Init(MyFStream& file, uint16_t len);
};
//SOF segment
class JPEGComponent {
public:
//1Y, 2Cb, 3Cr, 4I, 5Q
uint8_t colorId;
uint8_t h_samp_factor;
uint8_t v_samp_factor;
uint8_t qualityId;
bool Init(MyFStream& file, uint16_t len);
};
class JPEGData {
int max_h_samp_factor;//行MCU
int max_v_samp_factor;//列MCU
int width;
int height;
int precision;
bool isYUV411 = false;
bool isYUV422 = false;
bool isYUV111 = false;
uint8_t curDRI = 0;//当前重置直流分量标识,这里只取个位方便计算
uint16_t resetInterval = 0;//单位是MCU
int preDCValue[3] = { 0 }; //用于直流差分矫正
//量化表
std::vector<JPEGQuality> quality;
//huffman码表
std::vector<JPEGHuffmanCode> dc_huffman;
std::vector<JPEGHuffmanCode> ac_huffman;
//component每个颜色分量
std::vector<JPEGComponent> component;
JPEGScan scan;
//vector<int**> deHuffman;
std::vector<double**> ycbcr;
std::vector<RGB**> rgb;
double** DCTAndIDCTArray;
int pos;
bool EOI{ false };
public:
double m_threshold1, m_threshold2, m_res;
public:
JPEGData() :
max_h_samp_factor(0),
max_v_samp_factor(0),
width(0),
height(0),
precision(0) {
DCTAndIDCTArray = createDCTAndIDCTArray(ROW);
}
~JPEGData() {
FREE_LP_2(DCTAndIDCTArray, ROW - 1)
// FREE_LP_2(DCTArray,ROW-1)
// FREE_LP_2(IDCTArray,ROW-1)
FREE_VECTOR_LP(rgb)
}
bool readJPEG(const char* data, int length);
int getWidth() const { return width; }
int getHeight() const { return height; }
std::vector<RGB**> getRGB() const { return rgb; }
int getMaxHSampFactor() const { return max_h_samp_factor; }
int getMaxVSampFactor() const { return max_v_samp_factor; }
double** createDCTAndIDCTArray(int row);
//double** createIDCTArray(int row);
void DCT(double** originMatrix);
void IDCT(double** originMatrix);
protected:
bool readSOF(MyFStream& file, uint16_t len);
bool readData(MyFStream& file);
bool huffmanDecode(MyFStream& file);
void deQuality(double** originMatrix, int qualityID);
//隔行正负纠正
void PAndNCorrect(double** originMatrix);
RGB** YCbCrToRGB(const int* YUV);
//标记位检查 是否结束,是否重置直流矫正数值,返回要添加的数值
std::string FlagCkeck(MyFStream& file, int byteInfo);
uint16_t ReadByte(MyFStream& file, int len);
uint16_t findHuffmanCodeByBit(MyFStream& file, int& length, int& pos, std::string& deque, int curValue, int& curValLen);
};
class GIMGPROC_LIBRARY_API CImageApplyDiscardBlank : public CImageApply
{
public:
enum FileType
{
JPEG_COLOR,
JPEG_GRAY,
PNG_COLOR,
PNG_GRAY,
PNG_BINARAY
};
/// <summary>
/// 空白页识别
/// </summary>
/// <param name="pDib">原图</param>
/// <param name="threshold">轮廓阈值。取值范围[0, 255]</param>
/// <param name="edge">边缘缩进。取值范围[0, +∞]</param>
/// <param name="devTh">笔迹判定阈值。该阈值越低,越容易判定存在笔迹。取值范围[0, +∞]</param>
/// <param name="meanTh">文稿底色阈值。低于该阈值的文稿底色,直接视为非空白页。取值范围[0, 255]</param>
/// <param name="dilate">忽略纸张杂点。≤1时不生效值越大越容易忽略杂点。取值范围[1, +∞]</param>
CImageApplyDiscardBlank(double threshold = 40, int edge = 50, double devTh = 30, double meanTh = 200, int dilate = 11);
virtual ~CImageApplyDiscardBlank(void);
virtual void apply(cv::Mat& pDib, int side);
virtual void apply(std::vector<cv::Mat>& mats, bool isTwoSide);
/// <summary>
/// 空白页识别。根据图像内容进行识别。
/// </summary>
/// <param name="pDib">原图</param>
/// <param name="threshold">轮廓阈值</param>
/// <param name="edge">边缘缩进</param>
/// <param name="devTh">笔迹判定阈值。该阈值越低,越容易判定存在笔迹。</param>
/// <param name="meanTh">文稿底色阈值。低于该阈值的文稿底色,直接视为非空白页。</param>
/// <param name="dilate">忽略纸张杂点。≤1时不生效值越大越容易忽略杂点</param>
/// <returns>true为空白页false为非空白页</returns>
static bool apply(const cv::Mat& pDib, double threshold = 40, int edge = 50, double devTh = 30, double meanTh = 200, int dilate = 3);
/// <summary>
///
/// </summary>
/// <param name="fileSize">JPG文件大小</param>
/// <param name="imageSize">图像大小</param>
/// <param name="type">0为JPG + 彩色1为JPG + 灰度2为PNG + 彩色, 3为PNG + 灰度, 4为PNG + 二值图</param>
/// <param name="threshold">识别灵敏度阈值</param>
/// <param name="data">文件数据头指针</param>
/// <returns>true为空白页false为非空白页</returns>
static bool apply(int fileSize, const cv::Size& imageSize, FileType type, double threshold, const char* data = nullptr);
private:
double m_threshold;
int m_edge;
double m_devTh;
double m_meanTh;
int m_dilate;
};
#endif // !IMAGE_APPLY_DISCARD_BLANK_H