更新跳过空白页算法,去掉基于文件大小跳过空白页算法,优化效能

This commit is contained in:
yangjiaxuan 2023-12-05 09:02:43 +08:00
parent b7b85e44a8
commit 7ab95e272e
2 changed files with 23 additions and 1163 deletions

View File

@ -1,838 +1,6 @@
#include "ImageApplyDiscardBlank.h"
#include "ImageProcess_Public.h"
#include <algorithm>
#include <cmath>
#include <exception>
#include <stdint.h>
#include <bitset>
#include <stdlib.h>
#include <utility>
#include <cstring>
#include <vector>
#include <iomanip>
//#define _DEBUG_
#define BIGENDIAN 4321
#define LILENDIAN 1234
#if defined(__linux__)
# include <endian.h>
# define ENDIANNESS __BYTE_ORDER
#else
# if defined(__amd64__) || defined(_M_X64) || defined(__i386) || \
defined(_M_I86) || defined(_M_IX86) || defined(__X86__) || \
defined(_X86_) || defined(__THW_INTEL__) || defined(__I86__) || \
defined(__INTEL__) || defined(__386)
# define ENDIANNESS LILENDIAN
# else
# define ENDIANNESS BIGENDIAN
# endif
#endif
/* flip the byte order of 16 bits of data */
inline uint16_t flip16(void* p) {
uint16_t z = *(uint16_t*)(p);
return (z >> 9) | (z << 8); /* flip b0 and b1 */
}
/* flip the byte order of 32 bits of data */
inline uint32_t flip32(void* p) {
uint32_t z = *(uint32_t*)(p);
return
((z >> 24) & 0xFF) | /* b3 to b0 */
((z >> 8) & 0xFF00) | /* b2 to b1 */
((z << 8) & 0xFF0000) | /* b1 to b2 */
((z << 24) & 0xFF000000); /* b0 to b3 */
}
/* flip the byte order of 64 bits of data */
inline uint64_t flip64(void* p) {
uint64_t z = *(uint64_t*)(p);
return
((z >> 56) & 0xFFUL) | /* b7 to b0 */
((z >> 40) & (0xFFUL << 8)) | /* b6 to b1 */
((z >> 24) & (0xFFUL << 16)) | /* b5 to b2 */
((z >> 8) & (0xFFUL << 24)) | /* b4 to b3 */
((z << 8) & (0xFFUL << 32)) | /* b3 to b4 */
((z << 24) & (0xFFUL << 40)) | /* b2 to b5 */
((z << 40) & (0xFFUL << 48)) | /* b1 to b6 */
((z << 56) & (0xFFUL << 56)); /* b0 to b7 */
}
#if ENDIANNESS == BIGENDIAN
# define lil16(p) flip16(p)
# define lil32(p) flip32(p)
# define lil64(p) flip64(p)
# define big16(p) *(uint16_t*)(p)
# define big32(p) *(uint32_t*)(p)
# define big64(p) *(uint64_t*)(p)
#else
# define lil16(p) *(uint16_t*)(p)
# define lil32(p) *(uint32_t*)(p)
# define lil64(p) *(uint64_t*)(p)
# define big16(p) flip16(p)
# define big32(p) flip32(p)
# define big64(p) flip64(p)
#endif
// read in a file
png::png()
{
}
png::~png() {
// no deep shit yet
}
bool png::read(const char* data, int length, double threshold)
{
MyFStream file(data, length);
// magic png header
uchar b_hdr[8];
file.read((char*)b_hdr, 8);
if (std::memcmp("\x89\x50\x4e\x47\x0d\x0a\x1a\x0a", b_hdr, 8) != 0)
return false;
uchar IHDR = 0;
for (size_t i = 0; i < 4; i++)
IHDR = file.get();
char type[4] = { 0 };
file.read((char*)type, sizeof(type));
//file.read((char*)b_hdr, 8);
int width = 0, height = 0;
file.read_reverse((char*)&width, 4);
file.read_reverse((char*)&height, 4);
uchar depth = file.get();
uchar colorType = file.get();
uchar compressionMethod = file.get();
uchar filterMethod = file.get();
uchar interlaceMethod = file.get();
// read chunks
// assuming none are incomplete
std::string str_IDAT = "";
while (!file.eof() && str_IDAT != "IDAT")
{
char str = file.get();
if (str == 'I' && str_IDAT.empty())
str_IDAT = "I";
else if (str == 'D' && str_IDAT == "I")
str_IDAT = "ID";
else if (str == 'A' && str_IDAT == "ID")
str_IDAT = "IDA";
else if (str == 'T' && str_IDAT == "IDA")
str_IDAT = "IDAT";
else
str_IDAT = "";
}
if (str_IDAT != "IDAT")
return false;
int start_pos = file.tellg();
char CRC[4] = { 0 };
while (!file.eof())
{
char str = file.get();
if (str == 0x49)
{
file.move(-1);
file.read(CRC, 4);
if (std::memcmp("\x49\x45\x4e\x44", CRC, 4) == 0)
break;
}
}
if (std::memcmp("\x49\x45\x4e\x44", CRC, 4) != 0)
return false;
int end_pos = file.tellg();
double imageSize = width * height;
double fileSize = end_pos - start_pos;
if (fileSize / imageSize > threshold)
return false;
else
return true;
}
#define M_PI 3.14159265358979
int RGBValueLimit(double input) {
if (input < 0) return 0;
else if (input > 255) return 255;
// 四舍五入、取整均可
// return (int)(input);
return round(input);
}
void print(double** originMatrix) {
std::cout << std::endl;
for (int i = 0; i < ROW; i++) {
for (int j = 0; j < COL; j++) {
std::cout << originMatrix[i][j] << " ";
}
std::cout << std::endl;
}
std::cout << std::endl;
}
double** UnZigZag(int* originArray) {
double** table = new double* [ROW];
for (int i = 0; i < ROW; i++)
table[i] = new double[COL];
int cur = 0, x = 0, y = 0;
bool flag = true;//true是右上 false是左下
while (cur < 64) {
table[y][x] = originArray[cur++];
if (flag)
{
x++; y--;
}
else
{
x--; y++;
}
if (x < 0 || y < 0 || x>7 || y>7)
flag = !flag;
if (x < 0 && y>7)
{
x = 1; y = 7;
}
if (x < 0)
x = 0;
else if (x > 7)
{
x = 7; y += 2;
}
if (y < 0)
y = 0;
else if (y > 7)
{
y = 7; x += 2;
}
}
return table;
}
bool JPEGScan::Init(MyFStream& file, uint16_t len) {
try {
uint8_t count = file.get();
len--;
while (count--) {
uint8_t componentId = file.get();
uint8_t table = file.get();
uint8_t dcId = table >> 4;
uint8_t acId = table & 0x0f;
std::pair<uint8_t, uint8_t> info1(dcId, acId);
std::pair<uint8_t, std::pair<uint8_t, uint8_t>> info2(componentId, info1);
componentHuffmanMap.insert(info2);
}
}
catch (...) {
return false;
}
return true;
}
bool JPEGHuffmanCode::Init(MyFStream& file, uint16_t len) {
try {
std::vector<uint8_t> temp;
while (len--) {
int info = file.get();
temp.push_back(info);
}
int curPos = 16, curCode = 0;
for (int i = 0; i < 16; i++) {
int count = temp[i];
curCode <<= 1;
while (count--) {
uint16_t code = curCode;
uint8_t bit = i + 1;
uint8_t weight = temp[curPos];
std::pair<uint8_t, uint8_t> t1(bit, weight);
std::pair<uint16_t, std::pair<uint8_t, uint8_t>> t2(curCode, t1);
table.insert(t2);
curCode++;
curPos++;
}
}
}
catch (...) {
return false;
}
return true;
}
bool JPEGHuffmanCode::findKey(const uint16_t& code, const uint8_t& bit, iterator& it)
{
it = table.find(code);
if (it == table.end()) return true;
return it->second.first != bit;
}
bool JPEGQuality::Init(MyFStream& file, uint16_t len) {
try {
int info = file.get();
precision = info >> 4;
id = info & 0x0f;
len--;
while (len--) {
int t = file.get();
table.push_back(t);
}
}
catch (...) {
return false;
}
return true;
}
bool JPEGComponent::Init(MyFStream& file, uint16_t len) {
try {
int info1 = file.get();
int info2 = file.get();
int info3 = file.get();
colorId = info1;
h_samp_factor = info2 >> 4;
v_samp_factor = info2 & 0x0f;
qualityId = info3;
}
catch (...) {
return false;
}
return true;
}
bool JPEGData::readJPEG(const char* data, int length)
{
m_res = -1;
//std::fstream file(filePath, std::ios::in | std::ios::binary);
MyFStream file(data, length);
if (file.fail())
return false;
file.seekg(0, MyFStream::End);
pos = file.tellg();
file.seekg(2, MyFStream::Begin);
dc_huffman.resize(2);
ac_huffman.resize(2);
try
{
//do read data through using other method
uint16_t pLen = 0;
uint16_t pMarker = 0xFF;
uint16_t pType = 0x00;
while (!file.eof())
{
pMarker = file.get();
pType = file.get();
if (pType == EOI)
break;
pLen = file.get();
pLen = (pLen << 8) + file.get();
// cout<<hex<<pMarker<<" "<<pType<<" "<<pLen<<endl;
if (pMarker != 0xFF)
throw std::exception();
bool flag = true;
switch (pType)
{
case SOF0:
case SOF1:
case SOF2:
{
flag = readSOF(file, pLen - 2);
break;
}
case DHT:
{
JPEGHuffmanCode huf;
int info = file.get();
int tableId = info & 0x0f;
// cout<<hex<<info<<" ";
flag = huf.Init(file, pLen - 3);
if ((info >> 4) & 1) ac_huffman[tableId] = huf;
else dc_huffman[tableId] = huf;
break;
}
//case SOI:
//case EOI:
case SOS:
{
flag = scan.Init(file, pLen - 2);
int count = 3;
// cout<<endl;
while (count--) file.get();
// cout<<endl;
//正式读取数据
if (!flag) break;
flag = readData(file);
break;
}
case DQT:
{
JPEGQuality q;
flag = q.Init(file, pLen - 2);
quality.push_back(q);
break;
}
case DRI:
{
resetInterval = ReadByte(file, 2);
break;
}
case APP0:
case APP1:
case APP2:
case COM:
{
pLen -= 2;
while (pLen--)
{
file.get();
}
break;
}
default:
pLen -= 2;
while (pLen--)
{
file.get();
}
break;
}
if (!flag) throw std::exception();
// cout<<endl;
}
}
catch (...)
{
return false;
}
return true;
}
bool JPEGData::readSOF(MyFStream& file, uint16_t len)
{
try {
precision = file.get();
height = std::max(height, (int)ReadByte(file, 2));
width = std::max(width, (int)ReadByte(file, 2));
int count = ReadByte(file, 1);
if (count != 3) return false;
len -= 6;
component.resize(count);
for (int i = 0; i < count; i++)
{
JPEGComponent com;
com.Init(file, len / 3);
max_h_samp_factor = std::max(max_h_samp_factor, (int)com.h_samp_factor);
max_v_samp_factor = std::max(max_v_samp_factor, (int)com.v_samp_factor);
component[i] = com;
}
if (count == 3)
{
if ((component[0].h_samp_factor * component[0].v_samp_factor)
/ (component[1].h_samp_factor * component[1].v_samp_factor) == 4)
{
isYUV411 = true;
}
else if ((component[0].h_samp_factor * component[0].v_samp_factor)
/ (component[1].h_samp_factor * component[1].v_samp_factor) == 2)
{
isYUV422 = true;
}
else if ((component[0].h_samp_factor * component[0].v_samp_factor)
/ (component[1].h_samp_factor * component[1].v_samp_factor) == 1)
{
isYUV111 = true;
}
}
else
{
isYUV411 = isYUV422 = isYUV111 = false;
}
}
catch (...) {
return false;
}
return true;
}
bool JPEGData::readData(MyFStream& file)
{
bool flag = true;
try
{
//使用huffman表来解出RLE编码接着转回长度为64的矩阵
flag = huffmanDecode(file);
if (!flag) return false;
//反量化即上面的64矩阵×对应位置的量化表
//flag=deQuantity();
//if(!flag) return false;
//反zig-zag排序
//flag=deZSort();
//if(!flag) return false;
//反离散余弦变换
//if(!flag) return false;
//YCbCr转RGB
//if(!flag) return false;
}
catch (...) {
return false;
}
return true;
}
bool JPEGData::huffmanDecode(MyFStream& file)
{
try
{
//原图像一个MCU有多少8*8矩阵此时是YCbCr还没有分开
//int MCUBlockCount=max_h_samp_factor*max_v_samp_factor;
//顺序YCbCr
int YUV[] = { component[0].h_samp_factor * component[0].v_samp_factor,
component[1].h_samp_factor * component[1].v_samp_factor,
component[2].h_samp_factor * component[2].v_samp_factor };
int curMCUCount = 1; //当前MCU数量
int curValueLength = 0; //当前值有多少位
int curValue = 0; //当前的值
int curBitDequeLength = 8;//当前curBitDeque长度
int curBitPos = 0; //当前string读取到第几位
int restart = resetInterval;//直流分量重置
std::string curBitDeque = ""; //用来存储读出来的2进制数
//一次循环解析一个MCU
curBitDeque.append(std::bitset<8>(file.get()).to_string());
curBitDequeLength = 8;
// cout<<curBitDeque;
int count_1 = 0, count_2 = 0;
while (!EOI || (pos - file.tellg()) != 2)
{
// cout<<endl;
int count = 1;
for (int i = 0; i < 3; i++)
{
for (int j = 0; j < YUV[i]; j++)
{
// cout<<count++<<" ";
int matrix[64] = { 0 };
int valCount = 0;
uint8_t dcID = scan.componentHuffmanMap[component[i].colorId].first;
uint8_t acID = scan.componentHuffmanMap[component[i].colorId].second;
int qualityId = component[i].qualityId;
if (qualityId >= quality.size()) qualityId = 0;
// cout<<endl;
while (valCount < 64)
{
//用curBitDeque和curBit去找权重curValue作为当前键值
JPEGHuffmanCode::iterator it;
JPEGHuffmanCode& huffman = valCount == 0 ? dc_huffman[dcID] : ac_huffman[acID];
while (curValueLength <= 16 && huffman.findKey(curValue, curValueLength, it))
{
curValue = findHuffmanCodeByBit(file, curBitDequeLength, curBitPos, curBitDeque, curValue, curValueLength);
}
if (curValueLength > 16)
return true;
#ifdef _DEBUGOUT_
//cout<<dec<<" "<<curBitPos<<" "<<curBitDequeLength<<" ";
cout << "key=" << hex << curValue << " len=" << curValueLength << endl;
#endif
//已经找到了权重和位宽
uint8_t weight, zeroCount = 0;
if (valCount == 0)
weight = it->second.second;
else
{
weight = it->second.second & 0x0f;
zeroCount = it->second.second >> 4;
}
curValue = 0;//这里变为dc或ac值
curValueLength = 0;
if (valCount != 0 && weight == 0 && zeroCount == 0)
break;//后面全是0
// 读取真正的值
for (int k = 0; k < weight; k++)
{
curValue = findHuffmanCodeByBit(file, curBitDequeLength, curBitPos, curBitDeque, curValue, curValueLength);
}
curValue = (curValue >= pow(2, curValueLength - 1) ? curValue : curValue - pow(2, curValueLength) + 1);
// cout<<curValue<<endl;
int writeValue = valCount == 0 ? (preDCValue[i] += curValue) : curValue;
valCount += zeroCount;
writeValue *= quality[qualityId].table[valCount];//反量化
matrix[valCount] = writeValue;
curValue = 0;
curValueLength = 0;
valCount++;
}
//std::cout << valCount << std::endl;
if (valCount > m_threshold1 && matrix[0] < m_threshold2)
count_1++;
count_2++;
/*
double** tempZ = UnZigZag(matrix);//反zig-zag编码
//反量化在反zig-zag编码前后差别前面RGB数值与编辑器比偏小反之偏大这也与最后取整时的方式有关
// deQuality(tempZ,qualityId);
// print(tempZ);
//隔行正负纠正,有的博客说了,但是没感觉有啥帮助
// PAndNCorrect(tempZ);
IDCT(tempZ); //dct逆变换
ycbcr.push_back(tempZ);
*/
#ifdef _DEBUG_
for (int k = 0; k < ROW; k++) {
for (int l = 0; l < COL; l++) {
std::cout.width(3);
std::cout << std::dec << tempZ[k][j] << " ";
}
std::cout << std::endl;
}
std::cout << std::endl;
#endif
}
}
// if(count!=6){
// cout<<" ";
// }
/*
RGB** lpRGB = YCbCrToRGB(YUV);
FREE_VECTOR_LP(ycbcr)
rgb.push_back(lpRGB);
*/
// 直流分量重置间隔不为0的
if (restart > 0)
{
resetInterval--;
if (resetInterval == 0)
{
resetInterval = restart;
curDRI += 1;
curDRI &= 0x7;
//需要在此处读取两字节信息,看是否为重置标识
file.get();
if (file.get() == 0xD9)
EOI = true;
curBitPos = curBitDequeLength;
preDCValue[0] = 0;
preDCValue[1] = 0;
preDCValue[2] = 0;
}
}
// cout<<"curMCUCount="<<dec<<curMCUCount++<<" pos="<<pos<<"/"<<file.tellg()<<" "<<file.tellg()*100.0/pos<<"%\n";
if (pos - file.tellg() == 2) break;
}
m_res = static_cast<double>(count_1) / static_cast<double>(count_2);
//std::cout << std::setprecision(4) << m_res << std::endl;
//std::cout << "\nsuccessfully\n";
}
catch (std::exception ex)
{
std::cout << ex.what();
return false;
}
return true;
}
RGB** JPEGData::YCbCrToRGB(const int* YUV)
{
RGB** res = new RGB * [ROW * max_v_samp_factor];
int matrixCount = YUV[0] + YUV[1] + YUV[2];
int crCount = 0, cbCount = 0;
//1Y, 2Cb, 3Cr
//式子 scale*x,scale*y
double cb_h_samp_scale = component[1].h_samp_factor * 1.0 / max_h_samp_factor,
cb_v_samp_scale = component[1].v_samp_factor * 1.0 / max_v_samp_factor,
cr_h_samp_scale = component[2].h_samp_factor * 1.0 / max_h_samp_factor,
cr_v_samp_scale = component[2].v_samp_factor * 1.0 / max_v_samp_factor;
for (int i = 0; i < ROW * max_v_samp_factor; i++)
res[i] = new RGB[COL * max_h_samp_factor];
//此处直接生成rgb值
//注意此处YCbCr的对应关系与采样因子有关
// cout<<endl;
for (int j = 0; j < ROW * max_v_samp_factor; j++)
{
for (int k = 0; k < COL * max_h_samp_factor; k++)
{
int yPos = (j / ROW) * component[0].h_samp_factor + (k / COL);
int cbPos = YUV[0] + (int)((k / ROW) * cb_v_samp_scale) + (int)((j / COL) * cb_h_samp_scale);
int crPos = YUV[0] + YUV[1] + (int)((k / ROW) * cr_v_samp_scale) + (int)((j / COL) * cr_h_samp_scale);
double y = ycbcr[yPos][j % ROW][k % COL];
double cb = ycbcr[cbPos][(int)(j * cb_v_samp_scale)][(int)(k * cb_h_samp_scale)];
double cr = ycbcr[crPos][(int)(j * cr_v_samp_scale)][(int)(k * cr_h_samp_scale)];
res[j][k].red = RGBValueLimit(128 + y + 1.402 * cr);
res[j][k].green = RGBValueLimit(128 + y - 0.71414 * cr - 0.34414 * cb);
res[j][k].blue = RGBValueLimit(128 + y + 1.772 * cb);
// 输出当前选择的矩阵
//cout<<dec<<yPos<<" "<<cbPos<<" "<<crPos<<" ";
// cout<<hex<<setw(2)<<setfill('0')<<(int)res[j][k].red
// <<setw(2)<<setfill('0')<<(int)res[j][k].green
// <<setw(2)<<setfill('0')<<(int)res[j][k].blue<<" ";
}
// cout<<endl;
}
// cout<<endl;
return res;
}
double** JPEGData::createDCTAndIDCTArray(int row)
{
double** res = new double* [row];
for (int i = 0; i < row; i++) res[i] = new double[row];
// cout<<endl;
for (int i = 0; i < row; i++)
{
for (int j = 0; j < row; j++)
{
double t = 0;
if (i == 0) t = sqrt(1.0 / row);
else t = sqrt(2.0 / row);
res[i][j] = t * cos(M_PI * (j + 0.5) * i / row);
// cout<<res[i][j]<<" ";
}
// cout<<endl;
}
return res;
}
void JPEGData::DCT(double** originMatrix)
{
print(originMatrix);
//原理 Y=A*X*A'
std::vector<std::vector<double>> temp(ROW, std::vector<double>(COL, 0));
for (int i = 0; i < ROW; i++)
{
for (int j = 0; j < COL; j++)
{
double sum = 0;
for (int k = 0; k < COL; k++)
{
sum += DCTAndIDCTArray[i][k] * originMatrix[k][j];
}
temp[i][j] = sum;
}
}
for (int i = 0; i < ROW; i++)
{
for (int j = 0; j < COL; j++)
{
double sum = 0;
for (int k = 0; k < COL; k++)
{
sum += temp[i][k] * DCTAndIDCTArray[j][k];
}
originMatrix[i][j] = sum;
}
}
}
void JPEGData::IDCT(double** originMatrix)
{
//std::cout << originMatrix[0][0] << std::endl;
//原理X=A'*Y*A
std::vector<std::vector<double>> temp(ROW, std::vector<double>(COL, 0));
for (int i = 0; i < ROW; i++)
{
for (int j = 0; j < COL; j++)
{
double sum = 0;
for (int k = 0; k < COL; k++)
{
sum += DCTAndIDCTArray[k][i] * originMatrix[k][j];
}
temp[i][j] = sum;
}
}
for (int i = 0; i < ROW; i++)
{
for (int j = 0; j < COL; j++)
{
double sum = 0;
for (int k = 0; k < COL; k++)
{
sum += temp[i][k] * DCTAndIDCTArray[k][j];
}
originMatrix[i][j] = sum;
}
}
}
void JPEGData::deQuality(double** originMatrix, int qualityID)
{
for (int i = 0; i < ROW; i++)
{
for (int j = 0; j < COL; j++)
{
originMatrix[i][j] *= quality[qualityID].table[i * ROW + j];
}
}
}
void JPEGData::PAndNCorrect(double** originMatrix)
{
for (int i = 0; i < ROW; i++)
if (i % 2 == 1)
for (int j = 0; j < COL; j++)
originMatrix[i][j] = -originMatrix[i][j];
}
std::string JPEGData::FlagCkeck(MyFStream& file, int byteInfo)
{
if (byteInfo == 0xff)
{
uint8_t info = file.get();
std::string res = std::bitset<8>(0xFF).to_string();
if (info == 0xD9)
{
EOI = true; return "false";
}
else if (info == 0x00) return res;
return res + std::bitset<8>(info).to_string();
}
return std::bitset<8>(byteInfo).to_string();
}
uint16_t JPEGData::ReadByte(MyFStream& file, int len)
{
uint16_t res = file.get();
if (len != 1)
{
res = (res << 8) + (uint8_t)file.get();
}
return res;
}
uint16_t JPEGData::findHuffmanCodeByBit(MyFStream& file, int& length, int& pos, std::string& deque, int curValue, int& curValLen)
{
if (pos == length && length >= HUFFMAN_DECODE_DEQUE_CACHE)
{//达到最大缓存
deque = deque.substr(pos);
int info = file.get();
std::string res = FlagCkeck(file, info);
std::string str = std::bitset<8>(info).to_string();
if (res == "false") res = std::bitset<8>(file.get()).to_string();
deque.append(res);
length = deque.length();
pos = 0;
}
else if (length == 0 || pos >= length)
{
if (length == 0) {
deque = "";
pos = 0;
}
int info = file.get();
std::string res = FlagCkeck(file, info);
std::string str = std::bitset<8>(info).to_string();
if (res == "false") res = std::bitset<8>(file.get()).to_string();
deque.append(res);
length += 8;
}
curValue = (curValue << 1) + (uint8_t)(deque.at(pos++) - '0');
curValLen++;
return curValue;
}
#define FX 0.5
#define FY 0.5
CImageApplyDiscardBlank::CImageApplyDiscardBlank(double threshold, int edge, double devTh, double meanTh, int dilate)
: m_threshold(threshold)
, m_edge(edge)
@ -879,39 +47,34 @@ bool CImageApplyDiscardBlank::apply(const cv::Mat& pDib, double threshold, int e
if (pDib.empty())
return true;
double resizeScale = 1.0;
while (pDib.cols * resizeScale > 400)
resizeScale /= 2;
cv::Mat img_resize;
cv::resize(pDib, img_resize, cv::Size(), FX, FY);
if (img_resize.channels() == 3)
cv::cvtColor(img_resize, img_resize, cv::COLOR_BGR2GRAY);
if (dilate > 2)
{
cv::Mat element = cv::getStructuringElement(cv::MORPH_RECT, cv::Size(1, dilate));
cv::Mat img_temp1;
cv::morphologyEx(img_resize, img_temp1, cv::MORPH_DILATE, element);
element = cv::getStructuringElement(cv::MORPH_RECT, cv::Size(dilate, 1));
cv::Mat img_temp2;
cv::morphologyEx(img_resize, img_temp2, cv::MORPH_DILATE, element);
img_resize = img_temp1 & img_temp2;
}
cv::resize(pDib, img_resize, cv::Size(), resizeScale, resizeScale, cv::INTER_LINEAR);
cv::blur(img_resize, img_resize, cv::Size(3, 3));
//cv::imwrite("img_resize.jpg", img_resize);
cv::Mat threshold_img;
cv::threshold(img_resize, threshold_img, threshold, 255, cv::THRESH_BINARY);
if (img_resize.channels() == 3)
{
cv::cvtColor(img_resize, threshold_img, cv::COLOR_BGR2GRAY);
cv::threshold(threshold_img, threshold_img, threshold, 255, cv::THRESH_BINARY);
}
else
cv::threshold(img_resize, threshold_img, threshold, 255, cv::THRESH_BINARY);
std::vector<std::vector<cv::Point>> contours;
std::vector<cv::Vec4i> h1;
hg::findContours(threshold_img, contours, h1, cv::RETR_EXTERNAL, cv::CHAIN_APPROX_SIMPLE);
return true;
std::vector<cv::Point> contour;
for (const std::vector<cv::Point>& sub : contours)
for (const cv::Point& p : sub)
contour.push_back(p);
cv::RotatedRect rect = hg::getBoundingRect(contour);
rect.size = cv::Size2f(rect.size.width - edge * FX, rect.size.height - edge * FX);
rect.size = cv::Size2f(rect.size.width - edge * resizeScale, rect.size.height - edge * resizeScale);
cv::Point2f box[4];
rect.points(box);
contour.clear();
@ -922,6 +85,7 @@ bool CImageApplyDiscardBlank::apply(const cv::Mat& pDib, double threshold, int e
contours.push_back(contour);
cv::Mat mask = cv::Mat::zeros(img_resize.size(), CV_8UC1);
hg::fillPolys(mask, contours, cv::Scalar::all(255));
//cv::imwrite("mask.jpg", mask);
bool b = true;
if (img_resize.channels() == 3)
@ -936,63 +100,5 @@ bool CImageApplyDiscardBlank::apply(const cv::Mat& pDib, double threshold, int e
}
else
b &= maxMinCompare(img_resize, mask, devTh, meanTh);
return b;
}
bool CImageApplyDiscardBlank::apply(int fileSize, const cv::Size& imageSize, FileType type, double threshold, const char* data)
{
JPEGData jpg;
jpg.m_threshold1 = 3;
jpg.m_threshold2 = 700 + threshold;
png png1;
bool res;
switch (type)
{
case JPEG_COLOR:
res = jpg.readJPEG(data, fileSize);
std::cout << std::setprecision(4) << jpg.m_res << std::endl;
if (res)
return jpg.m_res < 0.0001;
else
return (static_cast<double>(fileSize) / static_cast<double>(imageSize.width * imageSize.height) > 0.036);
case JPEG_GRAY:
return (static_cast<double>(fileSize) / static_cast<double>(imageSize.width * imageSize.height) > 0.018);
case PNG_COLOR:
break;
case PNG_GRAY:
break;
case PNG_BINARAY:
return png1.read(data, fileSize, 0.025);
break;
}
return false;
}
MyFStream::MyFStream(const char* data, int length)
: m_data(data)
, m_length(length)
, m_pos(0)
{
}
void MyFStream::read(char* dst, int len)
{
if (m_pos + len < m_length)
{
memcpy(dst, m_data + m_pos, len);
m_pos += len;
}
}
void MyFStream::read_reverse(char* dst, int len)
{
if (m_pos + len < m_length)
{
for (size_t i = 0; i < len; i++)
dst[i] = m_data[m_pos + len - i - 1];
m_pos += len;
}
}

View File

@ -24,7 +24,9 @@
2023/10/20 v1.6.1 JPEG文件大小判断空白页
2023/10/30 v1.7 JPEG文件大小判断空白页的算法接口
2023/11/04 v1.7.1 PNG二值化文件大小判断空白页的选项
* v1.7.1
2023/12/01 v1.8 JPEG/PNG文件大小判断空白页方案
2023/12/04 v1.9
* v1.9
* ====================================================
*/
@ -33,247 +35,10 @@
#define IMAGE_APPLY_DISCARD_BLANK_H
#include "ImageApply.h"
#include <cmath>
#include <stdint.h>
#include <utility>
#include <string>
#include <vector>
#include <iostream>
#include <unordered_map>
#include <map>
class MyFStream
{
public:
enum Seekdir
{
Begin,
End
};
public:
MyFStream(const char* data, int length);
bool eof()
{
return m_pos >= m_length;
}
bool fail() { return (m_data == nullptr) || (m_length <= 0); }
void read(char* dst, int len);
void read_reverse(char* dst, int len);
unsigned char get()
{
if (m_pos >= m_length)
return 0xD9;
unsigned char d = m_data[m_pos];
m_pos++;
return d;
}
void seekg(int offset, Seekdir dir)
{
switch (dir)
{
case MyFStream::Begin:
m_pos = offset;
break;
case MyFStream::End:
m_pos = m_length + offset;
break;
default:
break;
}
}
int tellg() { return m_pos; }
void move(int step) { m_pos += step; }
private:
const char* m_data;
int m_length;
int m_pos;
};
class png
{
public:
png();
~png();
bool read(const char* data, int length, double threshold = 0.025);
};
#define ROW 8
#define COL 8
#define HUFFMAN_DECODE_DEQUE_CACHE 64//单位:位
// #define _DEBUG_
// #define _DEBUGOUT_
#define FREE_VECTOR_LP(vectorName) \
for(auto item : vectorName){ \
for(int i=0;i<ROW;i++)\
delete [] item[i];\
delete [] item; \
}\
vectorName.clear();
//释放二维指针
#define FREE_LP_2(lpName,row) \
for(int i=0;i<row;i++){\
delete [] lpName[i];\
}\
delete [] lpName;
//段类型
enum JPEGPType {
SOF0 = 0xC0, //帧开始
SOF1 = 0xC1, //帧开始
SOF2 = 0xC2, //帧开始
DHT = 0xC4, //哈夫曼表
SOI = 0xD8, //文件头
EOI = 0xD9, //文件尾
SOS = 0xDA, //扫描行开始
DQT = 0xDB, //定义量化表
DRI = 0xDD, //定义重新开始间隔
APP0 = 0xE0, //定义交换格式和图像识别信息
APP1 = 0xE1, //定义交换格式和图像识别信息
APP2 = 0xE2, //定义交换格式和图像识别信息
COM = 0xFE //注释
};
//将一维数组变为二维数组
double** UnZigZag(int* originArray);
struct RGB {
uint8_t red;
uint8_t green;
uint8_t blue;
};
//SOS
class JPEGScan {
public:
//componentId,<DC,AC>
std::map<uint8_t, std::pair<uint8_t, uint8_t>> componentHuffmanMap;
bool Init(MyFStream& file, uint16_t len);
};
//APP
class JPEGInfo {
public:
uint16_t version;
};
//DHT
class JPEGHuffmanCode {
public:
using iterator = std::map<uint16_t, std::pair<uint8_t, uint8_t>>::iterator;
//<code,<bit,weight>
std::map<uint16_t, std::pair<uint8_t, uint8_t>> table;
//init huffman table
bool Init(MyFStream& file, uint16_t len);
//find-true not find-false
bool findKey(const uint16_t& code, const uint8_t& bit, iterator& it);
};
//DQT
//quality table
class JPEGQuality {
public:
uint8_t precision;
uint8_t id;
std::vector<uint16_t> table;
bool Init(MyFStream& file, uint16_t len);
};
//SOF segment
class JPEGComponent {
public:
//1Y, 2Cb, 3Cr, 4I, 5Q
uint8_t colorId;
uint8_t h_samp_factor;
uint8_t v_samp_factor;
uint8_t qualityId;
bool Init(MyFStream& file, uint16_t len);
};
class JPEGData {
int max_h_samp_factor;//行MCU
int max_v_samp_factor;//列MCU
int width;
int height;
int precision;
bool isYUV411 = false;
bool isYUV422 = false;
bool isYUV111 = false;
uint8_t curDRI = 0;//当前重置直流分量标识,这里只取个位方便计算
uint16_t resetInterval = 0;//单位是MCU
int preDCValue[3] = { 0 }; //用于直流差分矫正
//量化表
std::vector<JPEGQuality> quality;
//huffman码表
std::vector<JPEGHuffmanCode> dc_huffman;
std::vector<JPEGHuffmanCode> ac_huffman;
//component每个颜色分量
std::vector<JPEGComponent> component;
JPEGScan scan;
//vector<int**> deHuffman;
std::vector<double**> ycbcr;
std::vector<RGB**> rgb;
double** DCTAndIDCTArray;
int pos;
bool EOI{ false };
public:
double m_threshold1, m_threshold2, m_res;
public:
JPEGData() :
max_h_samp_factor(0),
max_v_samp_factor(0),
width(0),
height(0),
precision(0) {
DCTAndIDCTArray = createDCTAndIDCTArray(ROW);
}
~JPEGData() {
FREE_LP_2(DCTAndIDCTArray, ROW - 1)
// FREE_LP_2(DCTArray,ROW-1)
// FREE_LP_2(IDCTArray,ROW-1)
FREE_VECTOR_LP(rgb)
}
bool readJPEG(const char* data, int length);
int getWidth() const { return width; }
int getHeight() const { return height; }
std::vector<RGB**> getRGB() const { return rgb; }
int getMaxHSampFactor() const { return max_h_samp_factor; }
int getMaxVSampFactor() const { return max_v_samp_factor; }
double** createDCTAndIDCTArray(int row);
//double** createIDCTArray(int row);
void DCT(double** originMatrix);
void IDCT(double** originMatrix);
protected:
bool readSOF(MyFStream& file, uint16_t len);
bool readData(MyFStream& file);
bool huffmanDecode(MyFStream& file);
void deQuality(double** originMatrix, int qualityID);
//隔行正负纠正
void PAndNCorrect(double** originMatrix);
RGB** YCbCrToRGB(const int* YUV);
//标记位检查 是否结束,是否重置直流矫正数值,返回要添加的数值
std::string FlagCkeck(MyFStream& file, int byteInfo);
uint16_t ReadByte(MyFStream& file, int len);
uint16_t findHuffmanCodeByBit(MyFStream& file, int& length, int& pos, std::string& deque, int curValue, int& curValLen);
};
class GIMGPROC_LIBRARY_API CImageApplyDiscardBlank : public CImageApply
{
public:
enum FileType
{
JPEG_COLOR,
JPEG_GRAY,
PNG_COLOR,
PNG_GRAY,
PNG_BINARAY
};
/// <summary>
/// 空白页识别
/// </summary>
@ -282,8 +47,8 @@ public:
/// <param name="edge">边缘缩进。取值范围[0, +∞]</param>
/// <param name="devTh">笔迹判定阈值。该阈值越低,越容易判定存在笔迹。取值范围[0, +∞]</param>
/// <param name="meanTh">文稿底色阈值。低于该阈值的文稿底色,直接视为非空白页。取值范围[0, 255]</param>
/// <param name="dilate">忽略纸张杂点。≤1时不生效值越大越容易忽略杂点。取值范围[1, +∞]</param>
CImageApplyDiscardBlank(double threshold = 40, int edge = 50, double devTh = 30, double meanTh = 200, int dilate = 11);
/// <param name="dilate">忽略纸张杂点。取值3、5、7、9...</param>
CImageApplyDiscardBlank(double threshold = 40, int edge = 50, double devTh = 30, double meanTh = 200, int dilate = 3);
virtual ~CImageApplyDiscardBlank(void);
@ -303,17 +68,6 @@ public:
/// <returns>true为空白页false为非空白页</returns>
static bool apply(const cv::Mat& pDib, double threshold = 40, int edge = 50, double devTh = 30, double meanTh = 200, int dilate = 3);
/// <summary>
///
/// </summary>
/// <param name="fileSize">JPG文件大小</param>
/// <param name="imageSize">图像大小</param>
/// <param name="type">0为JPG + 彩色1为JPG + 灰度2为PNG + 彩色, 3为PNG + 灰度, 4为PNG + 二值图</param>
/// <param name="threshold">识别灵敏度阈值</param>
/// <param name="data">文件数据头指针</param>
/// <returns>true为空白页false为非空白页</returns>
static bool apply(int fileSize, const cv::Size& imageSize, FileType type, double threshold, const char* data = nullptr);
private:
double m_threshold;
int m_edge;