code_app/app/scanner/ocrPdf.cpp

149 lines
4.3 KiB
C++

#include <iostream>
#include "base/HGBase.h"
#ifdef HG_CMP_MSC
#include <Windows.h>
#endif
#include <string>
#include "base/HGUtility.h"
std::string getOcrPath()
{
std::string ocrPath;
#ifdef HG_CMP_MSC
HKEY hKey = NULL;
#ifdef _WIN64
RegOpenKeyExA(HKEY_LOCAL_MACHINE, "SOFTWARE\\WOW6432Node\\Microtek\\Microtek OCR Engine V1", 0, KEY_QUERY_VALUE, &hKey);
#else
RegOpenKeyExA(HKEY_LOCAL_MACHINE, "SOFTWARE\\Microtek\\Microtek OCR Engine V1", 0, KEY_QUERY_VALUE, &hKey);
#endif
if (NULL != hKey)
{
CHAR szData[MAX_PATH] = { 0 };
DWORD cbData = MAX_PATH;
if (ERROR_SUCCESS == RegQueryValueExA(hKey, "InstallPath_Fast", NULL, NULL, (LPBYTE)szData, &cbData))
{
ocrPath = szData;
}
RegCloseKey(hKey);
}
#endif
return ocrPath;
}
bool createOcrPdf(const std::string &imgPath, const std::string &dstFile, int languageType)
{
#ifdef HG_CMP_MSC
std::string ocrPath = getOcrPath();
if (ocrPath.empty())
return false;
DWORD len1 = GetEnvironmentVariableA("Path", NULL, 0);
char* env1 = (char *)malloc((size_t)len1 + 256);
if (NULL != env1)
{
memset(env1, 0, (size_t)len1 + 256);
GetEnvironmentVariableA("Path", env1, len1 + 256);
std::string ss(env1);
if (ss.find(ocrPath) == std::string::npos)
{
strcat(env1, ";");
strcat(env1, ocrPath.c_str());
SetEnvironmentVariableA("Path", env1);
}
free(env1);
}
char env2[256] = {0};
strcat(env2, ocrPath.c_str());
strcat(env2, "\\");
strcat(env2, "tessdata_f");
SetEnvironmentVariableA("TESSDATA_PREFIX", env2);
DeleteFileA(dstFile.c_str());
const int MY_PIPE_BUFFER_SIZE = 1024;
//初始化管道
HANDLE hPipeRead = NULL;
HANDLE hPipeWrite = NULL;
SECURITY_ATTRIBUTES saOutPipe;
::ZeroMemory(&saOutPipe, sizeof(saOutPipe));
saOutPipe.nLength = sizeof(SECURITY_ATTRIBUTES);
saOutPipe.lpSecurityDescriptor = NULL;
saOutPipe.bInheritHandle = TRUE;
CreatePipe(&hPipeRead, &hPipeWrite, &saOutPipe, MY_PIPE_BUFFER_SIZE);
PROCESS_INFORMATION ProcessInfo;
::ZeroMemory(&ProcessInfo, sizeof(ProcessInfo));
STARTUPINFOA StartupInfo;
ZeroMemory(&StartupInfo, sizeof(StartupInfo));
StartupInfo.cb = sizeof(StartupInfo);
StartupInfo.dwFlags = STARTF_USESTDHANDLES | STARTF_USESHOWWINDOW;
StartupInfo.hStdOutput = hPipeWrite;
StartupInfo.hStdError = hPipeWrite;
StartupInfo.wShowWindow = SW_HIDE;
HGChar prefix[256];
HGBase_GetFilePrefix(dstFile.c_str(), prefix, 256);
bool ret = false;
char cmd[1024] = {0};
if (0 == languageType)
{
sprintf(cmd, "tesseract.exe \"%s\" \"%s\" -l %s --psm %d --oem %d %s", imgPath.c_str(), prefix, "eng", 3, 3, "pdf");
}
else if (1 == languageType)
{
sprintf(cmd, "tesseract.exe \"%s\" \"%s\" -l %s --psm %d --oem %d %s", imgPath.c_str(), prefix, "chi_sim", 3, 3, "pdf");
}
else if (2 == languageType)
{
sprintf(cmd, "tesseract.exe \"%s\" \"%s\" -l %s --psm %d --oem %d %s", imgPath.c_str(), prefix, "chi_tra", 3, 3, "pdf");
}
else if (3 == languageType)
{
sprintf(cmd, "tesseract.exe \"%s\" \"%s\" -l %s --psm %d --oem %d %s", imgPath.c_str(), prefix, "jpn", 3, 3, "pdf");
}
else if (4 == languageType)
{
sprintf(cmd, "tesseract.exe \"%s\" \"%s\" -l %s --psm %d --oem %d %s", imgPath.c_str(), prefix, "kor", 3, 3, "pdf");
}
BOOL b = CreateProcessA(NULL, cmd, NULL, NULL, TRUE, CREATE_NO_WINDOW, NULL, NULL, &StartupInfo, &ProcessInfo);
if (b)
{
WaitForSingleObject(ProcessInfo.hProcess, INFINITE);
DWORD exitCode;
GetExitCodeProcess(ProcessInfo.hProcess, &exitCode);
if (0 == exitCode)
ret = true;
DWORD dwReadLen = 0;
DWORD dwStdLen = 0;
if (PeekNamedPipe(hPipeRead, NULL, 0, NULL, &dwReadLen, NULL) && dwReadLen > 0)
{
char szPipeOut[MY_PIPE_BUFFER_SIZE];
::ZeroMemory(szPipeOut, sizeof(szPipeOut));
if (ReadFile(hPipeRead, szPipeOut, dwReadLen, &dwStdLen, NULL))
{
}
}
CloseHandle(ProcessInfo.hProcess);
CloseHandle(ProcessInfo.hThread);
}
CloseHandle(hPipeRead);
CloseHandle(hPipeWrite);
return ret;
#else
return false;
#endif
}