326 lines
8.9 KiB
C++
326 lines
8.9 KiB
C++
/*
|
||
* ====================================================
|
||
|
||
* 功能:空白页识别。
|
||
* 作者:刘丁维
|
||
* 生成时间:2020/4/21
|
||
* 最近修改时间:2020/4/21 v1.0
|
||
2020/8/12 v1.1 开放setIntensity和setMinArea;取消isNormal标识位;扩大setIntensity的设置范围,从[2, 20]扩大到[1, 100]
|
||
2020/8/25 v1.1.1 纸张检测缩进,从100像素调整到20像素
|
||
2020/10/16 v1.2 添加新接口,能够高效便捷判断图片是否为空白页
|
||
2020/10/19 v1.2.1 修复静态空白页判断识别误判的BUG
|
||
2021/04/13 v1.3.0 增加标准/票据标识位
|
||
2021/08/12 v1.3.1 添加防止不同opencv版本导致计算结果存在差异的代码。
|
||
2021/12/14 v1.3.2 重构算法。
|
||
2021/12/15 v1.3.3 微调参数。
|
||
2021/12/17 v1.3.4 增加背景色接口,实现对纯色纸张的空白页判定
|
||
2022/09/07 v1.3.5 修复部分参数传递的BUG
|
||
2022/09/19 v1.4 增加模糊处理,提高空白页的过滤能力
|
||
2022/09/19 v1.4.1 调整模糊处理步骤
|
||
2022/11/18 v1.4.2 调整默认参数
|
||
2022/11/29 v1.5 增加纸张杂点忽略功能
|
||
2022/12/03 v1.5.1 调整纸张杂点忽略逻辑,避免把细条纹(有效信息)给忽略掉;默认将图像按照灰度图进行识别。
|
||
2023/10/12 v1.6 添加新的空白页识别方案。采用JEPG文件大小判断是否为空白页。
|
||
2023/10/20 v1.6.1 优化JPEG文件大小判断空白页
|
||
2023/10/30 v1.7 调整JPEG文件大小判断空白页的算法接口
|
||
2023/11/04 v1.7.1 增加PNG二值化文件大小判断空白页的选项
|
||
* 版本号:v1.7.1
|
||
|
||
* ====================================================
|
||
*/
|
||
|
||
#ifndef IMAGE_APPLY_DISCARD_BLANK_H
|
||
#define IMAGE_APPLY_DISCARD_BLANK_H
|
||
|
||
#include "ImageApply.h"
|
||
#include <cmath>
|
||
#include <stdint.h>
|
||
#include <utility>
|
||
#include <string>
|
||
#include <vector>
|
||
#include <iostream>
|
||
#include <unordered_map>
|
||
#include <map>
|
||
|
||
class MyFStream
|
||
{
|
||
public:
|
||
enum Seekdir
|
||
{
|
||
Begin,
|
||
End
|
||
};
|
||
|
||
public:
|
||
MyFStream(const char* data, int length);
|
||
|
||
bool eof()
|
||
{
|
||
return m_pos >= m_length;
|
||
}
|
||
|
||
bool fail() { return (m_data == nullptr) || (m_length <= 0); }
|
||
|
||
void read(char* dst, int len);
|
||
|
||
void read_reverse(char* dst, int len);
|
||
|
||
unsigned char get()
|
||
{
|
||
if (m_pos >= m_length)
|
||
return 0xD9;
|
||
|
||
unsigned char d = m_data[m_pos];
|
||
m_pos++;
|
||
return d;
|
||
}
|
||
|
||
void seekg(int offset, Seekdir dir)
|
||
{
|
||
switch (dir)
|
||
{
|
||
case MyFStream::Begin:
|
||
m_pos = offset;
|
||
break;
|
||
case MyFStream::End:
|
||
m_pos = m_length + offset;
|
||
break;
|
||
default:
|
||
break;
|
||
}
|
||
}
|
||
|
||
int tellg() { return m_pos; }
|
||
|
||
void move(int step) { m_pos += step; }
|
||
|
||
private:
|
||
const char* m_data;
|
||
int m_length;
|
||
int m_pos;
|
||
};
|
||
|
||
class png
|
||
{
|
||
public:
|
||
png();
|
||
|
||
~png();
|
||
|
||
bool read(const char* data, int length, double threshold = 0.025);
|
||
};
|
||
|
||
#define ROW 8
|
||
#define COL 8
|
||
#define HUFFMAN_DECODE_DEQUE_CACHE 64//单位:位
|
||
// #define _DEBUG_
|
||
// #define _DEBUGOUT_
|
||
#define FREE_VECTOR_LP(vectorName) \
|
||
for(auto item : vectorName){ \
|
||
for(int i=0;i<ROW;i++)\
|
||
delete [] item[i];\
|
||
delete [] item; \
|
||
}\
|
||
vectorName.clear();
|
||
//释放二维指针
|
||
#define FREE_LP_2(lpName,row) \
|
||
for(int i=0;i<row;i++){\
|
||
delete [] lpName[i];\
|
||
}\
|
||
delete [] lpName;
|
||
//段类型
|
||
enum JPEGPType {
|
||
SOF0 = 0xC0, //帧开始
|
||
SOF1 = 0xC1, //帧开始
|
||
SOF2 = 0xC2, //帧开始
|
||
DHT = 0xC4, //哈夫曼表
|
||
SOI = 0xD8, //文件头
|
||
EOI = 0xD9, //文件尾
|
||
SOS = 0xDA, //扫描行开始
|
||
DQT = 0xDB, //定义量化表
|
||
DRI = 0xDD, //定义重新开始间隔
|
||
APP0 = 0xE0, //定义交换格式和图像识别信息
|
||
APP1 = 0xE1, //定义交换格式和图像识别信息
|
||
APP2 = 0xE2, //定义交换格式和图像识别信息
|
||
COM = 0xFE //注释
|
||
};
|
||
//将一维数组变为二维数组
|
||
double** UnZigZag(int* originArray);
|
||
struct RGB {
|
||
uint8_t red;
|
||
uint8_t green;
|
||
uint8_t blue;
|
||
};
|
||
//SOS
|
||
class JPEGScan {
|
||
public:
|
||
//componentId,<DC,AC>
|
||
std::map<uint8_t, std::pair<uint8_t, uint8_t>> componentHuffmanMap;
|
||
bool Init(MyFStream& file, uint16_t len);
|
||
};
|
||
//APP
|
||
class JPEGInfo {
|
||
public:
|
||
uint16_t version;
|
||
};
|
||
//DHT
|
||
class JPEGHuffmanCode {
|
||
public:
|
||
using iterator = std::map<uint16_t, std::pair<uint8_t, uint8_t>>::iterator;
|
||
//<code,<bit,weight>
|
||
std::map<uint16_t, std::pair<uint8_t, uint8_t>> table;
|
||
//init huffman table
|
||
bool Init(MyFStream& file, uint16_t len);
|
||
//find-true not find-false
|
||
bool findKey(const uint16_t& code, const uint8_t& bit, iterator& it);
|
||
};
|
||
//DQT
|
||
//quality table
|
||
class JPEGQuality {
|
||
public:
|
||
uint8_t precision;
|
||
uint8_t id;
|
||
std::vector<uint16_t> table;
|
||
bool Init(MyFStream& file, uint16_t len);
|
||
};
|
||
//SOF segment
|
||
class JPEGComponent {
|
||
public:
|
||
//1=Y, 2=Cb, 3=Cr, 4=I, 5=Q
|
||
uint8_t colorId;
|
||
uint8_t h_samp_factor;
|
||
uint8_t v_samp_factor;
|
||
uint8_t qualityId;
|
||
bool Init(MyFStream& file, uint16_t len);
|
||
};
|
||
class JPEGData {
|
||
int max_h_samp_factor;//行MCU
|
||
int max_v_samp_factor;//列MCU
|
||
int width;
|
||
int height;
|
||
int precision;
|
||
bool isYUV411 = false;
|
||
bool isYUV422 = false;
|
||
bool isYUV111 = false;
|
||
uint8_t curDRI = 0;//当前重置直流分量标识,这里只取个位方便计算
|
||
uint16_t resetInterval = 0;//单位是MCU
|
||
int preDCValue[3] = { 0 }; //用于直流差分矫正
|
||
//量化表
|
||
std::vector<JPEGQuality> quality;
|
||
//huffman码表
|
||
std::vector<JPEGHuffmanCode> dc_huffman;
|
||
std::vector<JPEGHuffmanCode> ac_huffman;
|
||
//component每个颜色分量
|
||
std::vector<JPEGComponent> component;
|
||
JPEGScan scan;
|
||
//vector<int**> deHuffman;
|
||
std::vector<double**> ycbcr;
|
||
std::vector<RGB**> rgb;
|
||
double** DCTAndIDCTArray;
|
||
int pos;
|
||
bool EOI{ false };
|
||
|
||
public:
|
||
double m_threshold1, m_threshold2, m_res;
|
||
public:
|
||
JPEGData() :
|
||
max_h_samp_factor(0),
|
||
max_v_samp_factor(0),
|
||
width(0),
|
||
height(0),
|
||
precision(0) {
|
||
DCTAndIDCTArray = createDCTAndIDCTArray(ROW);
|
||
}
|
||
~JPEGData() {
|
||
FREE_LP_2(DCTAndIDCTArray, ROW - 1)
|
||
// FREE_LP_2(DCTArray,ROW-1)
|
||
// FREE_LP_2(IDCTArray,ROW-1)
|
||
FREE_VECTOR_LP(rgb)
|
||
}
|
||
|
||
bool readJPEG(const char* data, int length);
|
||
int getWidth() const { return width; }
|
||
int getHeight() const { return height; }
|
||
std::vector<RGB**> getRGB() const { return rgb; }
|
||
int getMaxHSampFactor() const { return max_h_samp_factor; }
|
||
int getMaxVSampFactor() const { return max_v_samp_factor; }
|
||
double** createDCTAndIDCTArray(int row);
|
||
//double** createIDCTArray(int row);
|
||
void DCT(double** originMatrix);
|
||
void IDCT(double** originMatrix);
|
||
protected:
|
||
bool readSOF(MyFStream& file, uint16_t len);
|
||
bool readData(MyFStream& file);
|
||
bool huffmanDecode(MyFStream& file);
|
||
void deQuality(double** originMatrix, int qualityID);
|
||
//隔行正负纠正
|
||
void PAndNCorrect(double** originMatrix);
|
||
RGB** YCbCrToRGB(const int* YUV);
|
||
//标记位检查 是否结束,是否重置直流矫正数值,返回要添加的数值
|
||
std::string FlagCkeck(MyFStream& file, int byteInfo);
|
||
uint16_t ReadByte(MyFStream& file, int len);
|
||
uint16_t findHuffmanCodeByBit(MyFStream& file, int& length, int& pos, std::string& deque, int curValue, int& curValLen);
|
||
};
|
||
|
||
class GIMGPROC_LIBRARY_API CImageApplyDiscardBlank : public CImageApply
|
||
{
|
||
public:
|
||
enum FileType
|
||
{
|
||
JPEG_COLOR,
|
||
JPEG_GRAY,
|
||
PNG_COLOR,
|
||
PNG_GRAY,
|
||
PNG_BINARAY
|
||
};
|
||
|
||
/// <summary>
|
||
/// 空白页识别
|
||
/// </summary>
|
||
/// <param name="pDib">原图</param>
|
||
/// <param name="threshold">轮廓阈值。取值范围[0, 255]</param>
|
||
/// <param name="edge">边缘缩进。取值范围[0, +∞]</param>
|
||
/// <param name="devTh">笔迹判定阈值。该阈值越低,越容易判定存在笔迹。取值范围[0, +∞]</param>
|
||
/// <param name="meanTh">文稿底色阈值。低于该阈值的文稿底色,直接视为非空白页。取值范围[0, 255]</param>
|
||
/// <param name="dilate">忽略纸张杂点。≤1时不生效,值越大越容易忽略杂点。取值范围[1, +∞]</param>
|
||
CImageApplyDiscardBlank(double threshold = 40, int edge = 50, double devTh = 30, double meanTh = 200, int dilate = 11);
|
||
|
||
virtual ~CImageApplyDiscardBlank(void);
|
||
|
||
virtual void apply(cv::Mat& pDib, int side);
|
||
|
||
virtual void apply(std::vector<cv::Mat>& mats, bool isTwoSide);
|
||
|
||
/// <summary>
|
||
/// 空白页识别。根据图像内容进行识别。
|
||
/// </summary>
|
||
/// <param name="pDib">原图</param>
|
||
/// <param name="threshold">轮廓阈值</param>
|
||
/// <param name="edge">边缘缩进</param>
|
||
/// <param name="devTh">笔迹判定阈值。该阈值越低,越容易判定存在笔迹。</param>
|
||
/// <param name="meanTh">文稿底色阈值。低于该阈值的文稿底色,直接视为非空白页。</param>
|
||
/// <param name="dilate">忽略纸张杂点。≤1时不生效,值越大越容易忽略杂点</param>
|
||
/// <returns>true为空白页,false为非空白页</returns>
|
||
static bool apply(const cv::Mat& pDib, double threshold = 40, int edge = 50, double devTh = 30, double meanTh = 200, int dilate = 3);
|
||
|
||
/// <summary>
|
||
///
|
||
/// </summary>
|
||
/// <param name="fileSize">JPG文件大小</param>
|
||
/// <param name="imageSize">图像大小</param>
|
||
/// <param name="type">0为JPG + 彩色,1为JPG + 灰度,2为PNG + 彩色, 3为PNG + 灰度, 4为PNG + 二值图</param>
|
||
/// <param name="threshold">识别灵敏度阈值</param>
|
||
/// <param name="data">文件数据头指针</param>
|
||
/// <returns>true为空白页,false为非空白页</returns>
|
||
static bool apply(int fileSize, const cv::Size& imageSize, FileType type, double threshold, const char* data = nullptr);
|
||
|
||
private:
|
||
double m_threshold;
|
||
int m_edge;
|
||
double m_devTh;
|
||
double m_meanTh;
|
||
int m_dilate;
|
||
};
|
||
|
||
#endif // !IMAGE_APPLY_DISCARD_BLANK_H
|