调整更新部分功能
This commit is contained in:
parent
35a9c7b602
commit
898cb00afa
|
@ -0,0 +1,34 @@
|
|||
#pragma once
|
||||
#include <chrono>
|
||||
|
||||
class StopWatch
|
||||
{
|
||||
public:
|
||||
StopWatch() {
|
||||
_start = std::chrono::steady_clock::now();
|
||||
}
|
||||
|
||||
void reset() {
|
||||
_start = std::chrono::steady_clock::now();
|
||||
}
|
||||
|
||||
double elapsed_s() {
|
||||
return std::chrono::duration<double>(std::chrono::steady_clock::now() - _start).count();
|
||||
}
|
||||
|
||||
double elapsed_ms() {
|
||||
return std::chrono::duration<double, std::milli>(std::chrono::steady_clock::now() - _start).count();
|
||||
}
|
||||
|
||||
double elapsed_us() {
|
||||
return std::chrono::duration<double, std::micro>(std::chrono::steady_clock::now() - _start).count();
|
||||
}
|
||||
|
||||
double elapsed_ns() {
|
||||
return std::chrono::duration<double, std::nano>(std::chrono::steady_clock::now() - _start).count();
|
||||
}
|
||||
|
||||
private:
|
||||
std::chrono::steady_clock::time_point _start;
|
||||
};
|
||||
|
|
@ -0,0 +1,12 @@
|
|||
#ifndef CALLBACKDEFINESH
|
||||
#define CALLBACKDEFINESH
|
||||
#include <type_traits>
|
||||
typedef void(*usbreport_callback)(int conditioncode,void* usrdata);
|
||||
|
||||
typedef void(*usbcallback)(int conditioncode,void* usrdata);
|
||||
|
||||
//typedef void(*onimagecallback)(void* mat, int bpp, int statuscode);
|
||||
typedef std::decay<void(void*,int,int)>::type onimagecallback;
|
||||
typedef std::decay<void(int,void*)>::type usbcallback;
|
||||
|
||||
#endif
|
|
@ -0,0 +1,280 @@
|
|||
#ifndef COMMON_H
|
||||
#define COMMON_H
|
||||
#ifdef __cplusplus
|
||||
extern "C"{
|
||||
#endif
|
||||
|
||||
typedef struct hg_tag_SIZE
|
||||
{
|
||||
long cx;
|
||||
long cy;
|
||||
}CSSIZE, *PCSSIZE, *LPCSSIZE;
|
||||
|
||||
typedef CSSIZE CSSIZEL;
|
||||
typedef CSSIZE *PCSSIZEL, *LPCSSIZEL;
|
||||
|
||||
enum hg_tagUsbSupported
|
||||
{
|
||||
/*停止扫描**/
|
||||
SCAN_STOP = -1,
|
||||
/*异常******/
|
||||
HAVE_ERROR = -2,
|
||||
/*正常状态****/
|
||||
NORMAL = 0,
|
||||
/*开盖***/
|
||||
OPEN_COVER = 1,
|
||||
/*无纸****/
|
||||
NO_FEED = 2,
|
||||
/*搓纸失败****/
|
||||
FEED_IN_ERROR = 4,
|
||||
/**卡纸*****/
|
||||
PAPER_JAM = 8,
|
||||
/**检测到双张**/
|
||||
DETECT_DOUBLE_FEED = 16,
|
||||
/**检测到订书钉**/
|
||||
DETECT_STAPLE = 32,
|
||||
/*纸张倾斜******/
|
||||
PAPER_SKEW = 64,
|
||||
/**自动模式****/
|
||||
AUTO_SCAN_MODE = 65,
|
||||
/**手动模式****/
|
||||
MANAUL_SCAN_MODE = 66,
|
||||
/**计数模式****/
|
||||
COUNT_MODE = 67,
|
||||
/*硬件错误*****/
|
||||
HARDWARE_ERROR = 68,
|
||||
/*FPGA崩溃***/
|
||||
FPGA_ERROR = 68,
|
||||
/*开始******/
|
||||
START_SCAN = 69,
|
||||
/**停止*****/
|
||||
STOP_SCAN = 70,
|
||||
/**有图*****/
|
||||
HAVE_IMAGE = 71,
|
||||
/*更新扫描参数***/
|
||||
UPDATE_SCAN_PARAMETER = 72,
|
||||
/*PC繁忙或出错***/
|
||||
PC_SCAN_BUSY_or_ERROR = 73,
|
||||
/*USB链接断开***/
|
||||
DEVICE_OFF_LINE = 74
|
||||
};
|
||||
typedef enum hg_tagUsbSupported tagUsbSupported;
|
||||
|
||||
enum hg_twSS
|
||||
{
|
||||
None = 0,
|
||||
A4Letter = 1,
|
||||
A4 = 1,
|
||||
B5Letter = 2,
|
||||
JISB5 = 2,
|
||||
B5 = 2,
|
||||
USLetter = 3,
|
||||
USLegal = 4,
|
||||
A5 = 5,
|
||||
B4 = 6,
|
||||
ISOB4 = 6,
|
||||
B6 = 7,
|
||||
ISOB6 = 7,
|
||||
USLedger = 9,
|
||||
USExecutive = 10,
|
||||
A3 = 11,
|
||||
B3 = 12,
|
||||
ISOB3 = 12,
|
||||
A6 = 13,
|
||||
C4 = 14,
|
||||
C5 = 15,
|
||||
C6 = 16,
|
||||
_4A0 = 17,
|
||||
_2A0 = 18,
|
||||
A0 = 19,
|
||||
A1 = 20,
|
||||
A2 = 21,
|
||||
A7 = 22,
|
||||
A8 = 23,
|
||||
A9 = 24,
|
||||
A10 = 25,
|
||||
ISOB0 = 26,
|
||||
ISOB1 = 27,
|
||||
ISOB2 = 28,
|
||||
ISOB5 = 29,
|
||||
ISOB7 = 30,
|
||||
ISOB8 = 31,
|
||||
ISOB9 = 32,
|
||||
ISOB10 = 33,
|
||||
JISB0 = 34,
|
||||
JISB1 = 35,
|
||||
JISB2 = 36,
|
||||
JISB3 = 37,
|
||||
JISB4 = 38,
|
||||
JISB6 = 39,
|
||||
JISB7 = 40,
|
||||
JISB8 = 41,
|
||||
JISB9 = 42,
|
||||
JISB10 = 43,
|
||||
C0 = 44,
|
||||
C1 = 45,
|
||||
C2 = 46,
|
||||
C3 = 47,
|
||||
C7 = 48,
|
||||
C8 = 49,
|
||||
C9 = 50,
|
||||
C10 = 51,
|
||||
USStatement = 52,
|
||||
BusinessCard = 53,
|
||||
MaxSize = 54,
|
||||
};
|
||||
typedef enum hg_twSS TwSS;
|
||||
|
||||
enum hg_tagFrontBack
|
||||
{
|
||||
FRONT_PAGE = 0,
|
||||
BACK_PAGE
|
||||
};
|
||||
typedef enum hg_tagFrontBack FRONTBACK;
|
||||
|
||||
enum hg_tagFilter
|
||||
{
|
||||
FILTER_RED,
|
||||
FILTER_GREEN,
|
||||
FILTER_BLUE,
|
||||
FILTER_ALL,
|
||||
FILTER_NONE,
|
||||
ENHANCE_RED,
|
||||
ENHANCE_GREEN,
|
||||
ENHANCE_BLUE
|
||||
|
||||
|
||||
};
|
||||
typedef enum hg_tagFilter Filter;
|
||||
|
||||
enum hg_tagOrentations
|
||||
{
|
||||
ROTATE_NONE = 0,
|
||||
ROTATE_90,
|
||||
ROTATE_180,
|
||||
ROTATE_270,
|
||||
AUTOTEXT_DETECT
|
||||
};
|
||||
typedef enum hg_tagOrentations Orentations;
|
||||
|
||||
struct hg_tagOutHoleParam
|
||||
{
|
||||
int OutHole;
|
||||
int OutHoleValue;/*1~50;*/
|
||||
};
|
||||
typedef struct hg_tagOutHoleParam OutHoleParams;
|
||||
|
||||
struct hg_tagCropRect
|
||||
{
|
||||
int enable;
|
||||
int x; /*****自定义裁切区域左上角x坐标*/
|
||||
int y; /*****自定义裁切区域左上角y坐标*/
|
||||
int width; /*****自定义裁切区域宽度*******/
|
||||
int height; /*****自定义裁切区域高度*******/
|
||||
};
|
||||
typedef struct hg_tagCropRect CropRect;
|
||||
|
||||
struct hg_tagCustomGamma
|
||||
{
|
||||
int isDefined;
|
||||
unsigned char* table;
|
||||
int tableLength;
|
||||
};
|
||||
typedef struct hg_tagCustomGamma CustomGamma;
|
||||
|
||||
enum hg_PaperAlign {
|
||||
Rot0 = 0,
|
||||
Rot270 = 3,
|
||||
AutoTextOrientation = 5
|
||||
};
|
||||
typedef enum hg_PaperAlign PaperAlign;
|
||||
|
||||
enum hg_Multi_output {
|
||||
Unused = -1,
|
||||
All,
|
||||
ColorGray,
|
||||
ColorBw,
|
||||
GrayBw
|
||||
};
|
||||
typedef enum hg_Multi_output Multi_output;
|
||||
|
||||
struct hg_tagImageProcessParams
|
||||
{
|
||||
int PixType; /*same as color*/
|
||||
int DestResulution; /*same sa resulution*/
|
||||
int NativeResulution; /*fixed 200 for now*/
|
||||
int AutoDiscardBlank; /****跳过空白页通用****************/
|
||||
int AutoDiscardBlankVince; /****跳过空白页(发票)***********************/
|
||||
int IsDuplex; /*false:single*/
|
||||
int IsFold; /*对折*/
|
||||
int AutoDescrew;
|
||||
int AutoCrop;
|
||||
int FillBlackRect;
|
||||
int Filter; /*decolor ,0:red 1:green 2:blue 3:none encolor 5:red 6:green 7:blue*/
|
||||
OutHoleParams OutHoleParam;
|
||||
int Orentation; /*0:none 1:90 2:180 3:270 4:auto*/
|
||||
int BackRotate180;
|
||||
int Brightness; /*1~255*/
|
||||
int Contrast; /*1~7*/
|
||||
float Gamma; /*0.1f~5.0f*/
|
||||
int MultiOutRed;
|
||||
int MultiOutputType;/*-1:none 0:all 1:COLORGRAY 2:COLORBW 3:GRAYBW*/
|
||||
CropRect cropRect;
|
||||
CustomGamma customGamma;/*****色调曲线**************/
|
||||
int RefuseInflow;/*防止渗透**************************/
|
||||
int ColorCorrection;/*色彩校正**/
|
||||
int RemoveMorr; /**去除摩尔纹********/
|
||||
int ErrorExtention; /**错误扩散*****************/
|
||||
int TextureRemove;/****除网纹*******************/
|
||||
int imageSharpen;/*0:none 1:sharpen 2:sharpen_more 3:blur 4:blur_more*/
|
||||
int SplitImage;
|
||||
int AnswerSheetFilter;
|
||||
int NosieDetach;
|
||||
int AutoDetctOrentation;
|
||||
};
|
||||
typedef struct hg_tagImageProcessParams ImageProcessParams;
|
||||
|
||||
|
||||
enum hg_color_mode {
|
||||
BW,
|
||||
Gray,
|
||||
Color
|
||||
};
|
||||
typedef enum hg_color_mode ColorMode;
|
||||
struct hg_tagScanParams
|
||||
{
|
||||
int colorMode;/*2:color 1:gray 0:bw*/
|
||||
int papertype;
|
||||
PaperAlign paperAlign;
|
||||
int Resolution;/*fixed 200 for now*/
|
||||
int UltrasonicDetect;/*double check*/
|
||||
int BindingDetect;/*staple check*/
|
||||
int ScrewDetect;
|
||||
int ScrewTopLevel;/*1-5,1 easiest*/
|
||||
int ScanCount;/*1-500*/
|
||||
ImageProcessParams ImageProcessParam;
|
||||
};
|
||||
typedef struct hg_tagScanParams ScanParam;
|
||||
|
||||
struct hg_tagImageInfo
|
||||
{
|
||||
int Width;
|
||||
int Height;
|
||||
int bpp;
|
||||
};
|
||||
typedef struct hg_tagImageInfo ImageInfo;
|
||||
|
||||
enum hg_sharpenType
|
||||
{
|
||||
SharpenNone,
|
||||
Sharpen, /*********锐化**********************/
|
||||
SharpenMore, /*********进一步锐化*****************/
|
||||
Blur, /*********模糊*********************/
|
||||
BlurMore /*********进一步模糊***************/
|
||||
};
|
||||
typedef enum hg_sharpenType ImageSharpen;
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif
|
|
@ -0,0 +1,155 @@
|
|||
#ifndef PRE_DEFINE_H
|
||||
#define PRE_DEFINE_H
|
||||
|
||||
#define G100SCANNER
|
||||
|
||||
#define EN_LOG
|
||||
#include <vector>
|
||||
#ifndef _WIN32
|
||||
typedef bool BOOL;
|
||||
typedef unsigned int UINT32;
|
||||
#endif
|
||||
typedef unsigned int u32;
|
||||
typedef struct
|
||||
{
|
||||
u32 gainF[6];
|
||||
u32 gainB[6];
|
||||
u32 offsetsF[6];
|
||||
u32 offsetsB[6];
|
||||
u32 expF[3];
|
||||
u32 expB[3];
|
||||
u32 sp;
|
||||
}HGCISConfig;
|
||||
|
||||
|
||||
typedef struct
|
||||
{
|
||||
HGCISConfig colorCorrect;
|
||||
HGCISConfig color;
|
||||
HGCISConfig grayCorrect;
|
||||
HGCISConfig gray;
|
||||
}HGCorrectConfigs;
|
||||
|
||||
struct SPSET
|
||||
{
|
||||
unsigned int FSP;
|
||||
unsigned int BSP;
|
||||
};
|
||||
|
||||
|
||||
typedef struct CorrectParam {
|
||||
unsigned int Exposures[6];
|
||||
unsigned int Gain[12];
|
||||
unsigned int Offset[12];
|
||||
};
|
||||
|
||||
typedef struct CaptureParams
|
||||
{
|
||||
|
||||
int correctColorExposure[6];
|
||||
int correctColorGain[12];
|
||||
int correctColorOffset[12];
|
||||
|
||||
int correctGrayExposure[6];
|
||||
int correctGrayGain[12];
|
||||
int correctGrayOffset[12];
|
||||
|
||||
int colorExposure[6];
|
||||
int colorGain[12];
|
||||
int colorOffset[12];
|
||||
|
||||
int grayExposure[6];
|
||||
int grayGain[12];
|
||||
int grayOffset[12];
|
||||
|
||||
int uvCorrectColorExposure[2];
|
||||
int uvCorrectGrayExposure[2];
|
||||
int uvColorExposure[2];
|
||||
int uvGrayExposure[2];
|
||||
} CaptureParams;
|
||||
|
||||
|
||||
typedef struct hgsize{
|
||||
hgsize(){}
|
||||
template<typename T1, typename T2>
|
||||
hgsize(T1 x,T2 y)
|
||||
{
|
||||
cy = y;
|
||||
cx = x;
|
||||
}
|
||||
bool operator == (hgsize s)
|
||||
{
|
||||
if(s.cx == this->cx && s.cy == this->cy)
|
||||
return true;
|
||||
return false;
|
||||
}
|
||||
bool isempty()
|
||||
{
|
||||
return (this->cy*this->cx)?0:1;
|
||||
}
|
||||
int cy;
|
||||
int cx;
|
||||
}HgSize,HGSIZE;
|
||||
|
||||
|
||||
enum ScannerSerial: unsigned char
|
||||
{
|
||||
G100Serial,
|
||||
G200Serial,
|
||||
G300Serial,
|
||||
G400Serial,
|
||||
G10039Serial,
|
||||
G20039Serial,
|
||||
};
|
||||
|
||||
struct Vid_pid
|
||||
{
|
||||
Vid_pid(unsigned short set_vid, unsigned short set_pid) :
|
||||
vid(set_vid),
|
||||
pid(set_pid) {}
|
||||
bool operator == (Vid_pid sre)
|
||||
{
|
||||
if (sre.pid == this->pid && sre.vid == this->vid)
|
||||
return true;
|
||||
return false;
|
||||
}
|
||||
unsigned short vid;
|
||||
unsigned short pid;
|
||||
};
|
||||
|
||||
///#define LANXUMVERSION
|
||||
#define HGVERSION
|
||||
|
||||
|
||||
#ifdef EN_LOG
|
||||
#define LOG printf
|
||||
#else
|
||||
#define LOG
|
||||
#endif
|
||||
|
||||
#ifdef HGVERSION
|
||||
#ifdef G100SCANNER
|
||||
static std::vector<Vid_pid> DEVICE_ID={
|
||||
{0x3072,0x100},
|
||||
{0x3072,0x139}
|
||||
};
|
||||
#elif defined(G200SCANNER)
|
||||
static std::vector<Vid_pid> DEVICE_ID={
|
||||
{0x3072,0x200},
|
||||
{0x3072,0x239}
|
||||
};
|
||||
#elif defined(G300SCANNER)
|
||||
static std::vector<Vid_pid> DEVICE_ID={
|
||||
{0x3072,0x300},
|
||||
};
|
||||
#else
|
||||
static std::vector<Vid_pid> DEVICE_ID={
|
||||
{0x3072,0x400},
|
||||
};
|
||||
#endif
|
||||
#elif defined(LANXUMVERSION)
|
||||
static std::vector<Vid_pid> DEVICE_ID={
|
||||
{0x31c9,0x8730},
|
||||
};
|
||||
#endif
|
||||
#endif
|
|
@ -0,0 +1,467 @@
|
|||
#ifndef COMMON_H
|
||||
#define COMMON_H
|
||||
#ifdef __cplusplus
|
||||
#include <stdint.h>
|
||||
|
||||
#ifdef __linux__
|
||||
typedef unsigned char byte;
|
||||
#endif // _WIN32
|
||||
|
||||
|
||||
extern "C"{
|
||||
#endif
|
||||
|
||||
typedef struct hg_tag_SIZE
|
||||
{
|
||||
long cx;
|
||||
long cy;
|
||||
}CSSIZE, *PCSSIZE, *LPCSSIZE;
|
||||
|
||||
typedef CSSIZE CSSIZEL;
|
||||
typedef CSSIZE *PCSSIZEL, *LPCSSIZEL;
|
||||
|
||||
enum hg_tagUsbSupported
|
||||
{
|
||||
/*停止扫描**/
|
||||
SCAN_STOP = -1,
|
||||
/*异常******/
|
||||
HAVE_ERROR = -2,
|
||||
/*正常状态****/
|
||||
NORMAL = 0,
|
||||
/*开盖***/
|
||||
OPEN_COVER = 1,
|
||||
/*无纸****/
|
||||
NO_FEED = 2,
|
||||
/*搓纸失败****/
|
||||
FEED_IN_ERROR = 4,
|
||||
/**卡纸*****/
|
||||
PAPER_JAM = 8,
|
||||
/**检测到双张**/
|
||||
DETECT_DOUBLE_FEED = 16,
|
||||
/**检测到订书钉**/
|
||||
DETECT_STAPLE = 32,
|
||||
/*纸张倾斜******/
|
||||
PAPER_SKEW = 64,
|
||||
/**自动模式****/
|
||||
AUTO_SCAN_MODE = 65,
|
||||
/**手动模式****/
|
||||
MANAUL_SCAN_MODE = 66,
|
||||
/**计数模式****/
|
||||
COUNT_MODE = 67,
|
||||
/*硬件错误*****/
|
||||
HARDWARE_ERROR = 68,
|
||||
/*FPGA崩溃***/
|
||||
FPGA_ERROR = 68,
|
||||
/*开始******/
|
||||
START_SCAN = 69,
|
||||
/**停止*****/
|
||||
STOP_SCAN = 70,
|
||||
/**有图*****/
|
||||
HAVE_IMAGE = 71,
|
||||
/*更新扫描参数***/
|
||||
UPDATE_SCAN_PARAMETER = 72,
|
||||
/*PC繁忙或出错***/
|
||||
PC_SCAN_BUSY_or_ERROR = 73,
|
||||
/*USB链接断开***/
|
||||
DEVICE_OFF_LINE = 74,
|
||||
/*尺寸错误*/
|
||||
SIZE_ERROR = 75,
|
||||
//取图超时
|
||||
AQUIRE_IMAGE_TIMEOUT = 76,
|
||||
//获取图片与扫描张数不匹配
|
||||
LOSE_IMAGE = 77,
|
||||
//usb读取数据错误
|
||||
USB_BULK_ERROR = 78,
|
||||
//v4l2取图失败
|
||||
V4L2_AQULRE_ERROR = 79,
|
||||
//扫描仪内部图片丢失
|
||||
V4L2_IMAGE_EMPTY = 80,
|
||||
//处于休眠中
|
||||
SLEEPING = 81,
|
||||
//检测到有折角
|
||||
HAVE_DOGEAR = 82,
|
||||
//自动校正中
|
||||
AUTO_FLATTING = 198,
|
||||
//USB 未连接
|
||||
USB_DISCONNECTED = 200,
|
||||
//用户点击停止
|
||||
USER_STOP = 201,
|
||||
//自动平场校正完成
|
||||
AUTO_FLAT_FINISHED = 202
|
||||
};
|
||||
|
||||
typedef enum tagtwSS
|
||||
{
|
||||
None = 0,
|
||||
A4Letter = 1,
|
||||
A4 = 1,
|
||||
B5Letter = 2,
|
||||
JISB5 = 2,
|
||||
B5 = 2,
|
||||
USLetter = 3,
|
||||
USLegal = 4,
|
||||
A5 = 5,
|
||||
B4 = 6,
|
||||
ISOB4 = 6,
|
||||
B6 = 7,
|
||||
ISOB6 = 7,
|
||||
USLedger = 9,
|
||||
USExecutive = 10,
|
||||
A3 = 11,
|
||||
B3 = 12,
|
||||
ISOB3 = 12,
|
||||
A6 = 13,
|
||||
C4 = 14,
|
||||
C5 = 15,
|
||||
C6 = 16,
|
||||
_4A0 = 17,
|
||||
_2A0 = 18,
|
||||
A0 = 19,
|
||||
A1 = 20,
|
||||
A2 = 21,
|
||||
A7 = 22,
|
||||
A8 = 23,
|
||||
A9 = 24,
|
||||
A10 = 25,
|
||||
ISOB0 = 26,
|
||||
ISOB1 = 27,
|
||||
ISOB2 = 28,
|
||||
ISOB5 = 29,
|
||||
ISOB7 = 30,
|
||||
ISOB8 = 31,
|
||||
ISOB9 = 32,
|
||||
ISOB10 = 33,
|
||||
JISB0 = 34,
|
||||
JISB1 = 35,
|
||||
JISB2 = 36,
|
||||
JISB3 = 37,
|
||||
JISB4 = 38,
|
||||
JISB6 = 39,
|
||||
JISB7 = 40,
|
||||
JISB8 = 41,
|
||||
JISB9 = 42,
|
||||
JISB10 = 43,
|
||||
C0 = 44,
|
||||
C1 = 45,
|
||||
C2 = 46,
|
||||
C3 = 47,
|
||||
C7 = 48,
|
||||
C8 = 49,
|
||||
C9 = 50,
|
||||
C10 = 51,
|
||||
USStatement = 52,
|
||||
BusinessCard = 53,
|
||||
MaxSize = 54
|
||||
}TwSS;
|
||||
|
||||
typedef enum hg_tagUsbSupported tagUsbSupported;
|
||||
|
||||
#pragma pack(push)
|
||||
#pragma pack(4)
|
||||
typedef struct tagImageInfo
|
||||
{
|
||||
int Width;
|
||||
int Height;
|
||||
int bpp;
|
||||
} ImageInfo;
|
||||
|
||||
typedef struct Scan_Rect {
|
||||
int width;
|
||||
int height;
|
||||
int x;
|
||||
int y;
|
||||
}ScanRect;
|
||||
|
||||
/*********************************************************************************/
|
||||
//基础参数
|
||||
typedef enum tagColorMode {
|
||||
BlackWhite,
|
||||
Gray,
|
||||
RGB
|
||||
}ColorMode;
|
||||
|
||||
typedef enum tagMulti_Output {
|
||||
Unused = -1,
|
||||
All,
|
||||
ColorGray,
|
||||
ColorBw,
|
||||
GrayBw
|
||||
}MultiOutput;
|
||||
|
||||
typedef enum tagPaper_Align :unsigned char {
|
||||
Rot0 = 0,
|
||||
Rot270 = 3
|
||||
}PaperAlign;
|
||||
|
||||
typedef struct tagCrop_Rect
|
||||
{
|
||||
int enable;
|
||||
int x; /*****自定义裁切区域左上角x坐标*/
|
||||
int y; /*****自定义裁切区域左上角y坐标*/
|
||||
int width; /*****自定义裁切区域宽度*******/
|
||||
int height; /*****自定义裁切区域高度*******/
|
||||
}CropRect;
|
||||
|
||||
typedef struct tagScan_Side {
|
||||
int duplex; /*0: both ; 1: front*/
|
||||
int discardBlank; /*跳过空白页通用*/
|
||||
int discardBlankVince; /*跳过空白页(发票)*/
|
||||
int fold; /*对折*/
|
||||
int switchFrontBack; /*互换正反面*/
|
||||
}ScanSide;
|
||||
|
||||
typedef struct tagSkew_Detection {
|
||||
int enable;
|
||||
int level;
|
||||
}SkewDetection;
|
||||
|
||||
typedef struct tagHhardware_Params
|
||||
{
|
||||
int capturepixtype;
|
||||
int sizeDetection;
|
||||
int doubleFeedDetection;
|
||||
int bindingDetection;
|
||||
SkewDetection skewDetection;
|
||||
}HardwareCaps;
|
||||
|
||||
//图像处理参数
|
||||
typedef struct tagCcustom_Gamma
|
||||
{
|
||||
int enable;
|
||||
unsigned char table[768];
|
||||
int tableLength;
|
||||
}CustomGamma;
|
||||
|
||||
typedef struct tagFill_Hole
|
||||
{
|
||||
uint8_t enable;
|
||||
int ratio;/*1~50;*/
|
||||
}FillHole;
|
||||
|
||||
typedef enum tagColor_Filter
|
||||
{
|
||||
FILTER_RED,
|
||||
FILTER_GREEN,
|
||||
FILTER_BLUE,
|
||||
FILTER_NONE,
|
||||
FILTER_ALL,
|
||||
ENHANCE_RED,
|
||||
ENHANCE_GREEN,
|
||||
ENHANCE_BLUE
|
||||
}ColorFilter;
|
||||
|
||||
typedef enum tagSharpen_Type
|
||||
{
|
||||
STNone,
|
||||
Sharpen,
|
||||
SharpenMore,
|
||||
Blur,
|
||||
BlurMore
|
||||
}SharpenType;
|
||||
|
||||
typedef enum tagOrentation
|
||||
{
|
||||
ROTATE_NONE = 0,
|
||||
ROTATE_90,
|
||||
ROTATE_180,
|
||||
ROTATE_270,
|
||||
AUTOTEXT_DETECT
|
||||
}Orentation;
|
||||
|
||||
typedef struct tagjpegCompress {
|
||||
int enable;
|
||||
int ratio;
|
||||
}JpegCompress;
|
||||
|
||||
typedef struct tagImage_Process
|
||||
{
|
||||
int autoCrop; /*自动裁剪尺寸*/
|
||||
//亮度对比度伽马值
|
||||
int brightness; /*1~255*/
|
||||
int contrast; /*1~7*/
|
||||
float gamma; /*0.1f~5.0f*/
|
||||
CustomGamma customGamma;
|
||||
//图像处理
|
||||
int fillBlackRect;
|
||||
int autoDescrew;
|
||||
int refuseInflow;/*防止渗透*/
|
||||
FillHole fillHole;
|
||||
ColorFilter filter;
|
||||
int colorCorrection;/*色彩校正*/
|
||||
int removeMorr; /*去除摩尔纹*/
|
||||
int errorExtention; /*错误扩散*/
|
||||
int nosieDetach;/*噪点优化*/
|
||||
int NosieDetachEnable;
|
||||
int textureRemove;/*除网纹*/
|
||||
int indent;/*边缘缩进像素*/
|
||||
int noise;/*降噪像素点*/
|
||||
int AutoCrop_threshold;/*自动裁剪二值化阀值*/
|
||||
bool is_convex;/*填充黑框方式*/
|
||||
SharpenType sharpenType;
|
||||
int multiOutFilterRed;/*多流输出除红*/
|
||||
int answerSheetFilterRed;/*答题卡除红*/
|
||||
//送纸
|
||||
Orentation orentation;
|
||||
int backRotate180;
|
||||
//其他
|
||||
JpegCompress jpegCompress;
|
||||
int splitImage;
|
||||
int discardblank_percent;
|
||||
}ImageProcess;
|
||||
|
||||
|
||||
/*********************************************************************************/
|
||||
typedef struct
|
||||
{
|
||||
ColorMode pixelType;
|
||||
MultiOutput multiOutput;
|
||||
TwSS paperSize;
|
||||
PaperAlign paperAlign;
|
||||
CropRect cropRect;
|
||||
int resolution;
|
||||
int resolution_native;
|
||||
ScanSide scanSide;
|
||||
ImageProcess imageProcess;
|
||||
int scanCount; /* -1: 连续扫描 */
|
||||
HardwareCaps hardwareParam;
|
||||
int previewScan;
|
||||
int threshold;
|
||||
bool is_correct;
|
||||
/*保存信息*/
|
||||
/*std::string Caption;
|
||||
std::string SavePath;*/
|
||||
}GScanCap;
|
||||
|
||||
/******************
|
||||
**参数保存结构体**
|
||||
*******************/
|
||||
typedef struct tagCONFIGPARAMS
|
||||
{
|
||||
/*基本选项卡参数*/
|
||||
int Pixtype;
|
||||
int PaperSize;
|
||||
int EnAutoCrop;
|
||||
int Resolution;
|
||||
int EnDuplex;
|
||||
int EnDiscardBlank;
|
||||
int EnDiscardBlankVince;
|
||||
int DBlank_AreaNum;
|
||||
int DBlank_DevnMax;
|
||||
int EnFold;
|
||||
int EnExchangeFrontBack;
|
||||
/*亮度对比度选项卡参数*/
|
||||
float Brightness;
|
||||
int EnAutoContrast;
|
||||
float Contrast;
|
||||
float Gamma;
|
||||
|
||||
/*图像处理选项卡参数*/
|
||||
int Filter;
|
||||
int Sharpen;
|
||||
int EnFillBlack;
|
||||
int EnAutoDescrew;
|
||||
int EnOutHole;
|
||||
int OutHoleRatio;
|
||||
int EnMultiOutPutR;
|
||||
int EnAnswerSheetR;
|
||||
|
||||
/*送纸部分选项卡参数*/
|
||||
int EnUltrasonicDetect;
|
||||
int EnBindingDetect;
|
||||
int ScanCount;
|
||||
int Orentation;
|
||||
int EnBackRotate180;
|
||||
int EnScrewDetect;
|
||||
int ScrewDetectLevel;
|
||||
|
||||
/*保存信息*/
|
||||
/*std::string Caption;
|
||||
std::string SavePath;*/
|
||||
}CONFIGPARAMS, * PCONFIGPARAMS;
|
||||
|
||||
typedef struct tagDetachNoise
|
||||
{
|
||||
int8_t is_detachnoise;
|
||||
int detachnoise;
|
||||
}DetachNoise;
|
||||
|
||||
typedef struct tagHARDWAREPARAMS_39
|
||||
{
|
||||
int8_t capturepixtype;
|
||||
int8_t en_doublefeed;
|
||||
int8_t en_stapledetect;
|
||||
int8_t en_skrewdetect;
|
||||
int8_t skrewdetectlevel;
|
||||
int lowpowermode;
|
||||
#ifdef UV
|
||||
byte en_uv;
|
||||
#endif
|
||||
}HardwareCaps_39;
|
||||
|
||||
struct GScanCap_3399
|
||||
{
|
||||
uint8_t papertype; /**< the current paper source ADF or Flatbed*/
|
||||
PaperAlign paperAlign;
|
||||
uint8_t en_sizecheck; /**< 尺寸检测*/
|
||||
float imageRotateDegree;
|
||||
uint8_t is_duplex; /**< True to use duplex false for simplex, ignored if flatbed*/
|
||||
uint8_t en_fold; /**<对折*/
|
||||
int pixtype; /**< type of pixels to transfer image as */
|
||||
int automaticcolor; /**<顔色自動識別*/
|
||||
int automaticcolortype; /**<顔色自動識別后非彩色上傳類型*/
|
||||
//ScanRect scanrect;
|
||||
float resolution_dst; /**< horizontal resolution */
|
||||
float resolution_native;
|
||||
float gamma; /**< Gamma */
|
||||
float contrast; /**< Contrast */
|
||||
float brightness; /**< Brightness */
|
||||
float threshold; /**< Threshold */
|
||||
uint8_t is_autocontrast; /**< 自动对比度*/
|
||||
uint8_t is_autocrop; /**< 自动裁切*/
|
||||
uint8_t is_autodiscradblank_normal; /**< 自动丢弃空白页通用*/
|
||||
int discardblank_percent; /**<跳过空白页阀值*/
|
||||
uint8_t is_autodiscradblank_vince;/**自动丢弃空白页发票*/
|
||||
uint8_t is_switchfrontback; /**交换正反面*/
|
||||
uint8_t autodescrew; /**< 自动纠偏*/
|
||||
uint8_t multi_output_red; /*多流输出*/
|
||||
uint8_t hsvcorrect; /**<答题卡除红*/
|
||||
uint8_t filter; /**< 除色*/
|
||||
uint8_t sharpen;
|
||||
uint8_t enhance_color; /**< 颜色增强*/
|
||||
uint8_t fillbackground; /**< 填黑框*/
|
||||
bool is_convex; /**< 填黑框模式,true为凸多边形填充,false为凹多边形填充,默认true*/
|
||||
int noise; /**< 除噪像素,能够消除noise宽度的背景竖条纹干扰,默认40*/
|
||||
int indent; /**< 轮廓缩进,裁剪、纠偏或者黑底填充时,对探索到的纸张轮廓进行缩进indent像素,默认5*/
|
||||
int AutoCrop_threshold; /**< 自动裁剪二值化阈值,取值范围(0, 255),默认40*/
|
||||
unsigned short scannum; /**< 扫描张数*/
|
||||
uint8_t is_backrotate180; /**< 背面旋转180*/
|
||||
uint8_t is_dogeardetection; /**<折角检测*/
|
||||
HardwareCaps_39 hardwarecaps; /**< 硬件扫描参数*/
|
||||
FillHole fillhole;
|
||||
DetachNoise detachnoise; /**< 黑白降噪*/
|
||||
uint8_t is_autotext; /**< 自动文本方向识别*/
|
||||
bool isfillcolor; /**< 自动裁切颜色填充>*/
|
||||
int refuseInflow; /**< 防止渗透>*/
|
||||
int colorCorrection; /**< 色彩校正>*/
|
||||
int removeMorr; /**< 去除摩尔纹>*/
|
||||
int errorExtention; /** < 错误扩散>*/
|
||||
int textureRemove; /** < 除网纹>*/
|
||||
int splitImage; /** < 图像拆分>*/
|
||||
CropRect cropRect; /**< 自定义裁切>*/
|
||||
MultiOutput multiOutput; /**< 多流输出>*/
|
||||
bool normalCrop; /**< 自动裁切深色样张>*/
|
||||
uint32_t reserve[1024]; /**< 预留4096字节做协议扩展*/
|
||||
};
|
||||
#pragma pack(pop)
|
||||
|
||||
/*typedef struct tagCONFIGINFO
|
||||
{
|
||||
std::string Caption;
|
||||
std::string SavePath;
|
||||
}CONFIGINFO, * PCONFIGINFO;*/
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif
|
|
@ -50,7 +50,6 @@
|
|||
#endif
|
||||
|
||||
#include "opencv2/core/cvdef.h"
|
||||
#include "opencv2/core/version.hpp"
|
||||
#include "opencv2/core/base.hpp"
|
||||
#include "opencv2/core/cvstd.hpp"
|
||||
#include "opencv2/core/traits.hpp"
|
||||
|
@ -68,12 +67,15 @@
|
|||
@defgroup core_c_glue Connections with C++
|
||||
@}
|
||||
@defgroup core_array Operations on arrays
|
||||
@defgroup core_async Asynchronous API
|
||||
@defgroup core_xml XML/YAML Persistence
|
||||
@defgroup core_cluster Clustering
|
||||
@defgroup core_utils Utility and system functions and macros
|
||||
@{
|
||||
@defgroup core_logging Logging facilities
|
||||
@defgroup core_utils_sse SSE utilities
|
||||
@defgroup core_utils_neon NEON utilities
|
||||
@defgroup core_utils_vsx VSX utilities
|
||||
@defgroup core_utils_softfloat Softfloat support
|
||||
@defgroup core_utils_samples Utility functions for OpenCV samples
|
||||
@}
|
||||
|
@ -199,6 +201,9 @@ enum CovarFlags {
|
|||
COVAR_COLS = 16
|
||||
};
|
||||
|
||||
//! @addtogroup core_cluster
|
||||
//! @{
|
||||
|
||||
//! k-Means flags
|
||||
enum KmeansFlags {
|
||||
/** Select random initial centers in each attempt.*/
|
||||
|
@ -212,6 +217,8 @@ enum KmeansFlags {
|
|||
KMEANS_USE_INITIAL_LABELS = 1
|
||||
};
|
||||
|
||||
//! @} core_cluster
|
||||
|
||||
//! type of line
|
||||
enum LineTypes {
|
||||
FILLED = -1,
|
||||
|
@ -233,12 +240,16 @@ enum HersheyFonts {
|
|||
FONT_ITALIC = 16 //!< flag for italic font
|
||||
};
|
||||
|
||||
//! @addtogroup core_array
|
||||
//! @{
|
||||
|
||||
enum ReduceTypes { REDUCE_SUM = 0, //!< the output is the sum of all rows/columns of the matrix.
|
||||
REDUCE_AVG = 1, //!< the output is the mean vector of all rows/columns of the matrix.
|
||||
REDUCE_MAX = 2, //!< the output is the maximum (column/row-wise) of all rows/columns of the matrix.
|
||||
REDUCE_MIN = 3 //!< the output is the minimum (column/row-wise) of all rows/columns of the matrix.
|
||||
};
|
||||
|
||||
//! @} core_array
|
||||
|
||||
/** @brief Swaps two matrices
|
||||
*/
|
||||
|
@ -311,9 +322,9 @@ if src was not a ROI, use borderType | #BORDER_ISOLATED.
|
|||
@param src Source image.
|
||||
@param dst Destination image of the same type as src and the size Size(src.cols+left+right,
|
||||
src.rows+top+bottom) .
|
||||
@param top
|
||||
@param bottom
|
||||
@param left
|
||||
@param top the top pixels
|
||||
@param bottom the bottom pixels
|
||||
@param left the left pixels
|
||||
@param right Parameter specifying how many pixels in each direction from the source image rectangle
|
||||
to extrapolate. For example, top=1, bottom=1, left=1, right=1 mean that 1 pixel-wide border needs
|
||||
to be built.
|
||||
|
@ -1612,7 +1623,9 @@ elements.
|
|||
CV_EXPORTS_W bool checkRange(InputArray a, bool quiet = true, CV_OUT Point* pos = 0,
|
||||
double minVal = -DBL_MAX, double maxVal = DBL_MAX);
|
||||
|
||||
/** @brief converts NaN's to the given number
|
||||
/** @brief converts NaNs to the given number
|
||||
@param a input/output matrix (CV_32F type).
|
||||
@param val value to convert the NaNs
|
||||
*/
|
||||
CV_EXPORTS_W void patchNaNs(InputOutputArray a, double val = 0);
|
||||
|
|
@ -0,0 +1,105 @@
|
|||
// This file is part of OpenCV project.
|
||||
// It is subject to the license terms in the LICENSE file found in the top-level directory
|
||||
// of this distribution and at http://opencv.org/license.html.
|
||||
|
||||
#ifndef OPENCV_CORE_ASYNC_HPP
|
||||
#define OPENCV_CORE_ASYNC_HPP
|
||||
|
||||
#include <opencv2/core/mat.hpp>
|
||||
|
||||
#ifdef CV_CXX11
|
||||
//#include <future>
|
||||
#include <chrono>
|
||||
#endif
|
||||
|
||||
namespace cv {
|
||||
|
||||
/** @addtogroup core_async
|
||||
|
||||
@{
|
||||
*/
|
||||
|
||||
|
||||
/** @brief Returns result of asynchronous operations
|
||||
|
||||
Object has attached asynchronous state.
|
||||
Assignment operator doesn't clone asynchronous state (it is shared between all instances).
|
||||
|
||||
Result can be fetched via get() method only once.
|
||||
|
||||
*/
|
||||
class CV_EXPORTS_W AsyncArray
|
||||
{
|
||||
public:
|
||||
~AsyncArray() CV_NOEXCEPT;
|
||||
CV_WRAP AsyncArray() CV_NOEXCEPT;
|
||||
AsyncArray(const AsyncArray& o) CV_NOEXCEPT;
|
||||
AsyncArray& operator=(const AsyncArray& o) CV_NOEXCEPT;
|
||||
CV_WRAP void release() CV_NOEXCEPT;
|
||||
|
||||
/** Fetch the result.
|
||||
@param[out] dst destination array
|
||||
|
||||
Waits for result until container has valid result.
|
||||
Throws exception if exception was stored as a result.
|
||||
|
||||
Throws exception on invalid container state.
|
||||
|
||||
@note Result or stored exception can be fetched only once.
|
||||
*/
|
||||
CV_WRAP void get(OutputArray dst) const;
|
||||
|
||||
/** Retrieving the result with timeout
|
||||
@param[out] dst destination array
|
||||
@param[in] timeoutNs timeout in nanoseconds, -1 for infinite wait
|
||||
|
||||
@returns true if result is ready, false if the timeout has expired
|
||||
|
||||
@note Result or stored exception can be fetched only once.
|
||||
*/
|
||||
bool get(OutputArray dst, int64 timeoutNs) const;
|
||||
|
||||
CV_WRAP inline
|
||||
bool get(OutputArray dst, double timeoutNs) const { return get(dst, (int64)timeoutNs); }
|
||||
|
||||
bool wait_for(int64 timeoutNs) const;
|
||||
|
||||
CV_WRAP inline
|
||||
bool wait_for(double timeoutNs) const { return wait_for((int64)timeoutNs); }
|
||||
|
||||
CV_WRAP bool valid() const CV_NOEXCEPT;
|
||||
|
||||
#ifdef CV_CXX11
|
||||
inline AsyncArray(AsyncArray&& o) { p = o.p; o.p = NULL; }
|
||||
inline AsyncArray& operator=(AsyncArray&& o) CV_NOEXCEPT { std::swap(p, o.p); return *this; }
|
||||
|
||||
template<typename _Rep, typename _Period>
|
||||
inline bool get(OutputArray dst, const std::chrono::duration<_Rep, _Period>& timeout)
|
||||
{
|
||||
return get(dst, (int64)(std::chrono::nanoseconds(timeout).count()));
|
||||
}
|
||||
|
||||
template<typename _Rep, typename _Period>
|
||||
inline bool wait_for(const std::chrono::duration<_Rep, _Period>& timeout)
|
||||
{
|
||||
return wait_for((int64)(std::chrono::nanoseconds(timeout).count()));
|
||||
}
|
||||
|
||||
#if 0
|
||||
std::future<Mat> getFutureMat() const;
|
||||
std::future<UMat> getFutureUMat() const;
|
||||
#endif
|
||||
#endif
|
||||
|
||||
|
||||
// PImpl
|
||||
struct Impl; friend struct Impl;
|
||||
inline void* _getImpl() const CV_NOEXCEPT { return p; }
|
||||
protected:
|
||||
Impl* p;
|
||||
};
|
||||
|
||||
|
||||
//! @}
|
||||
} // namespace
|
||||
#endif // OPENCV_CORE_ASYNC_HPP
|
|
@ -188,7 +188,7 @@ enum NormTypes {
|
|||
norm = \forkthree
|
||||
{ \| \texttt{src1} \| _{L_2} ^{2} = \sum_I \texttt{src1}(I)^2} {if \(\texttt{normType} = \texttt{NORM_L2SQR}\)}
|
||||
{ \| \texttt{src1} - \texttt{src2} \| _{L_2} ^{2} = \sum_I (\texttt{src1}(I) - \texttt{src2}(I))^2 }{if \(\texttt{normType} = \texttt{NORM_L2SQR}\) }
|
||||
{ \left(\frac{\|\texttt{src1}-\texttt{src2}\|_{L_2} }{\|\texttt{src2}\|_{L_2}}\right)^2 }{if \(\texttt{normType} = \texttt{NORM_RELATIVE | NORM_L2}\) }
|
||||
{ \left(\frac{\|\texttt{src1}-\texttt{src2}\|_{L_2} }{\|\texttt{src2}\|_{L_2}}\right)^2 }{if \(\texttt{normType} = \texttt{NORM_RELATIVE | NORM_L2SQR}\) }
|
||||
\f]
|
||||
*/
|
||||
NORM_L2SQR = 5,
|
||||
|
@ -326,8 +326,8 @@ CV_INLINE CV_NORETURN void errorNoReturn(int _code, const String& _err, const ch
|
|||
|
||||
// In practice, some macro are not processed correctly (noreturn is not detected).
|
||||
// We need to use simplified definition for them.
|
||||
#define CV_Error(...) do { abort(); } while (0)
|
||||
#define CV_Error_( code, args ) do { cv::format args; abort(); } while (0)
|
||||
#define CV_Error(code, msg) do { (void)(code); (void)(msg); abort(); } while (0)
|
||||
#define CV_Error_(code, args) do { (void)(code); (void)(cv::format args); abort(); } while (0)
|
||||
#define CV_Assert( expr ) do { if (!(expr)) abort(); } while (0)
|
||||
#define CV_ErrorNoReturn CV_Error
|
||||
#define CV_ErrorNoReturn_ CV_Error_
|
||||
|
@ -587,6 +587,21 @@ _AccTp normInf(const _Tp* a, const _Tp* b, int n)
|
|||
*/
|
||||
CV_EXPORTS_W float cubeRoot(float val);
|
||||
|
||||
/** @overload
|
||||
|
||||
cubeRoot with argument of `double` type calls `std::cbrt(double)` (C++11) or falls back on `pow()` for C++98 compilation mode.
|
||||
*/
|
||||
static inline
|
||||
double cubeRoot(double val)
|
||||
{
|
||||
#ifdef CV_CXX11
|
||||
return std::cbrt(val);
|
||||
#else
|
||||
double v = pow(abs(val), 1/3.); // pow doesn't support negative inputs with fractional exponents
|
||||
return val >= 0 ? v : -v;
|
||||
#endif
|
||||
}
|
||||
|
||||
/** @brief Calculates the angle of a 2D vector in degrees.
|
||||
|
||||
The function fastAtan2 calculates the full-range angle of an input 2D vector. The angle is measured
|
|
@ -0,0 +1,170 @@
|
|||
// This file is part of OpenCV project.
|
||||
// It is subject to the license terms in the LICENSE file found in the top-level directory
|
||||
// of this distribution and at http://opencv.org/license.html.
|
||||
|
||||
#ifndef OPENCV_CORE_BINDINGS_UTILS_HPP
|
||||
#define OPENCV_CORE_BINDINGS_UTILS_HPP
|
||||
|
||||
#include <opencv2/core/async.hpp>
|
||||
#include <opencv2/core/detail/async_promise.hpp>
|
||||
#include <opencv2/core/utils/logger.hpp>
|
||||
|
||||
#include <stdexcept>
|
||||
|
||||
namespace cv { namespace utils {
|
||||
//! @addtogroup core_utils
|
||||
//! @{
|
||||
|
||||
CV_EXPORTS_W String dumpInputArray(InputArray argument);
|
||||
|
||||
CV_EXPORTS_W String dumpInputArrayOfArrays(InputArrayOfArrays argument);
|
||||
|
||||
CV_EXPORTS_W String dumpInputOutputArray(InputOutputArray argument);
|
||||
|
||||
CV_EXPORTS_W String dumpInputOutputArrayOfArrays(InputOutputArrayOfArrays argument);
|
||||
|
||||
CV_WRAP static inline
|
||||
String dumpBool(bool argument)
|
||||
{
|
||||
return (argument) ? String("Bool: True") : String("Bool: False");
|
||||
}
|
||||
|
||||
CV_WRAP static inline
|
||||
String dumpInt(int argument)
|
||||
{
|
||||
return cv::format("Int: %d", argument);
|
||||
}
|
||||
|
||||
CV_WRAP static inline
|
||||
String dumpSizeT(size_t argument)
|
||||
{
|
||||
std::ostringstream oss("size_t: ", std::ios::ate);
|
||||
oss << argument;
|
||||
return oss.str();
|
||||
}
|
||||
|
||||
CV_WRAP static inline
|
||||
String dumpFloat(float argument)
|
||||
{
|
||||
return cv::format("Float: %.2f", argument);
|
||||
}
|
||||
|
||||
CV_WRAP static inline
|
||||
String dumpDouble(double argument)
|
||||
{
|
||||
return cv::format("Double: %.2f", argument);
|
||||
}
|
||||
|
||||
CV_WRAP static inline
|
||||
String dumpCString(const char* argument)
|
||||
{
|
||||
return cv::format("String: %s", argument);
|
||||
}
|
||||
|
||||
CV_WRAP static inline
|
||||
String dumpString(const String& argument)
|
||||
{
|
||||
return cv::format("String: %s", argument.c_str());
|
||||
}
|
||||
|
||||
CV_WRAP static inline
|
||||
String testOverloadResolution(int value, const Point& point = Point(42, 24))
|
||||
{
|
||||
return format("overload (int=%d, point=(x=%d, y=%d))", value, point.x,
|
||||
point.y);
|
||||
}
|
||||
|
||||
CV_WRAP static inline
|
||||
String testOverloadResolution(const Rect& rect)
|
||||
{
|
||||
return format("overload (rect=(x=%d, y=%d, w=%d, h=%d))", rect.x, rect.y,
|
||||
rect.width, rect.height);
|
||||
}
|
||||
|
||||
CV_WRAP static inline
|
||||
String dumpRect(const Rect& argument)
|
||||
{
|
||||
return format("rect: (x=%d, y=%d, w=%d, h=%d)", argument.x, argument.y,
|
||||
argument.width, argument.height);
|
||||
}
|
||||
|
||||
CV_WRAP static inline
|
||||
String dumpTermCriteria(const TermCriteria& argument)
|
||||
{
|
||||
return format("term_criteria: (type=%d, max_count=%d, epsilon=%lf",
|
||||
argument.type, argument.maxCount, argument.epsilon);
|
||||
}
|
||||
|
||||
CV_WRAP static inline
|
||||
String dumpRotatedRect(const RotatedRect& argument)
|
||||
{
|
||||
return format("rotated_rect: (c_x=%f, c_y=%f, w=%f, h=%f, a=%f)",
|
||||
argument.center.x, argument.center.y, argument.size.width,
|
||||
argument.size.height, argument.angle);
|
||||
}
|
||||
|
||||
CV_WRAP static inline
|
||||
String dumpRange(const Range& argument)
|
||||
{
|
||||
if (argument == Range::all())
|
||||
{
|
||||
return "range: all";
|
||||
}
|
||||
else
|
||||
{
|
||||
return format("range: (s=%d, e=%d)", argument.start, argument.end);
|
||||
}
|
||||
}
|
||||
|
||||
CV_WRAP static inline
|
||||
void testRaiseGeneralException()
|
||||
{
|
||||
throw std::runtime_error("exception text");
|
||||
}
|
||||
|
||||
CV_WRAP static inline
|
||||
AsyncArray testAsyncArray(InputArray argument)
|
||||
{
|
||||
AsyncPromise p;
|
||||
p.setValue(argument);
|
||||
return p.getArrayResult();
|
||||
}
|
||||
|
||||
CV_WRAP static inline
|
||||
AsyncArray testAsyncException()
|
||||
{
|
||||
AsyncPromise p;
|
||||
try
|
||||
{
|
||||
CV_Error(Error::StsOk, "Test: Generated async error");
|
||||
}
|
||||
catch (const cv::Exception& e)
|
||||
{
|
||||
p.setException(e);
|
||||
}
|
||||
return p.getArrayResult();
|
||||
}
|
||||
|
||||
//! @} // core_utils
|
||||
} // namespace cv::utils
|
||||
|
||||
//! @cond IGNORED
|
||||
|
||||
CV_WRAP static inline
|
||||
int setLogLevel(int level)
|
||||
{
|
||||
// NB: Binding generators doesn't work with enums properly yet, so we define separate overload here
|
||||
return cv::utils::logging::setLogLevel((cv::utils::logging::LogLevel)level);
|
||||
}
|
||||
|
||||
CV_WRAP static inline
|
||||
int getLogLevel()
|
||||
{
|
||||
return cv::utils::logging::getLogLevel();
|
||||
}
|
||||
|
||||
//! @endcond IGNORED
|
||||
|
||||
} // namespaces cv / utils
|
||||
|
||||
#endif // OPENCV_CORE_BINDINGS_UTILS_HPP
|
|
@ -63,12 +63,13 @@ struct CheckContext {
|
|||
#define CV__CHECK_LOCATION_VARNAME(id) CVAUX_CONCAT(CVAUX_CONCAT(__cv_check_, id), __LINE__)
|
||||
#define CV__DEFINE_CHECK_CONTEXT(id, message, testOp, p1_str, p2_str) \
|
||||
static const cv::detail::CheckContext CV__CHECK_LOCATION_VARNAME(id) = \
|
||||
{ CV__CHECK_FUNCTION, CV__CHECK_FILENAME, __LINE__, testOp, message, p1_str, p2_str }
|
||||
{ CV__CHECK_FUNCTION, CV__CHECK_FILENAME, __LINE__, testOp, "" message, "" p1_str, "" p2_str }
|
||||
|
||||
CV_EXPORTS void CV_NORETURN check_failed_auto(const int v1, const int v2, const CheckContext& ctx);
|
||||
CV_EXPORTS void CV_NORETURN check_failed_auto(const size_t v1, const size_t v2, const CheckContext& ctx);
|
||||
CV_EXPORTS void CV_NORETURN check_failed_auto(const float v1, const float v2, const CheckContext& ctx);
|
||||
CV_EXPORTS void CV_NORETURN check_failed_auto(const double v1, const double v2, const CheckContext& ctx);
|
||||
CV_EXPORTS void CV_NORETURN check_failed_auto(const Size_<int> v1, const Size_<int> v2, const CheckContext& ctx);
|
||||
CV_EXPORTS void CV_NORETURN check_failed_MatDepth(const int v1, const int v2, const CheckContext& ctx);
|
||||
CV_EXPORTS void CV_NORETURN check_failed_MatType(const int v1, const int v2, const CheckContext& ctx);
|
||||
CV_EXPORTS void CV_NORETURN check_failed_MatChannels(const int v1, const int v2, const CheckContext& ctx);
|
||||
|
@ -77,6 +78,8 @@ CV_EXPORTS void CV_NORETURN check_failed_auto(const int v, const CheckContext& c
|
|||
CV_EXPORTS void CV_NORETURN check_failed_auto(const size_t v, const CheckContext& ctx);
|
||||
CV_EXPORTS void CV_NORETURN check_failed_auto(const float v, const CheckContext& ctx);
|
||||
CV_EXPORTS void CV_NORETURN check_failed_auto(const double v, const CheckContext& ctx);
|
||||
CV_EXPORTS void CV_NORETURN check_failed_auto(const Size_<int> v, const CheckContext& ctx);
|
||||
CV_EXPORTS void CV_NORETURN check_failed_auto(const std::string& v1, const CheckContext& ctx);
|
||||
CV_EXPORTS void CV_NORETURN check_failed_MatDepth(const int v, const CheckContext& ctx);
|
||||
CV_EXPORTS void CV_NORETURN check_failed_MatType(const int v, const CheckContext& ctx);
|
||||
CV_EXPORTS void CV_NORETURN check_failed_MatChannels(const int v, const CheckContext& ctx);
|
|
@ -53,7 +53,7 @@
|
|||
which is incompatible with C
|
||||
|
||||
It is OK to disable it because we only extend few plain structures with
|
||||
C++ construrtors for simpler interoperability with C++ API of the library
|
||||
C++ constructors for simpler interoperability with C++ API of the library
|
||||
*/
|
||||
# pragma warning(disable:4190)
|
||||
# elif defined __clang__ && __clang_major__ >= 3
|
||||
|
@ -579,7 +579,7 @@ CvNArrayIterator;
|
|||
#define CV_NO_CN_CHECK 2
|
||||
#define CV_NO_SIZE_CHECK 4
|
||||
|
||||
/** initializes iterator that traverses through several arrays simulteneously
|
||||
/** initializes iterator that traverses through several arrays simultaneously
|
||||
(the function together with cvNextArraySlice is used for
|
||||
N-ari element-wise operations) */
|
||||
CVAPI(int) cvInitNArrayIterator( int count, CvArr** arrs,
|
||||
|
@ -1309,7 +1309,7 @@ CVAPI(void) cvMulTransposed( const CvArr* src, CvArr* dst, int order,
|
|||
const CvArr* delta CV_DEFAULT(NULL),
|
||||
double scale CV_DEFAULT(1.) );
|
||||
|
||||
/** Tranposes matrix. Square matrices can be transposed in-place */
|
||||
/** Transposes matrix. Square matrices can be transposed in-place */
|
||||
CVAPI(void) cvTranspose( const CvArr* src, CvArr* dst );
|
||||
#define cvT cvTranspose
|
||||
|
|
@ -126,7 +126,7 @@ public:
|
|||
GpuMat(int rows, int cols, int type, Allocator* allocator = defaultAllocator());
|
||||
GpuMat(Size size, int type, Allocator* allocator = defaultAllocator());
|
||||
|
||||
//! constucts GpuMat and fills it with the specified value _s
|
||||
//! constructs GpuMat and fills it with the specified value _s
|
||||
GpuMat(int rows, int cols, int type, Scalar s, Allocator* allocator = defaultAllocator());
|
||||
GpuMat(Size size, int type, Scalar s, Allocator* allocator = defaultAllocator());
|
||||
|
|
@ -101,6 +101,20 @@ namespace cv { namespace cuda
|
|||
cudaChannelFormatDesc desc = cudaCreateChannelDesc<T>();
|
||||
cudaSafeCall( cudaBindTexture2D(0, tex, img.ptr(), &desc, img.cols, img.rows, img.step) );
|
||||
}
|
||||
|
||||
template<class T> inline void createTextureObjectPitch2D(cudaTextureObject_t* tex, PtrStepSz<T>& img, const cudaTextureDesc& texDesc)
|
||||
{
|
||||
cudaResourceDesc resDesc;
|
||||
memset(&resDesc, 0, sizeof(resDesc));
|
||||
resDesc.resType = cudaResourceTypePitch2D;
|
||||
resDesc.res.pitch2D.devPtr = static_cast<void*>(img.ptr());
|
||||
resDesc.res.pitch2D.height = img.rows;
|
||||
resDesc.res.pitch2D.width = img.cols;
|
||||
resDesc.res.pitch2D.pitchInBytes = img.step;
|
||||
resDesc.res.pitch2D.desc = cudaCreateChannelDesc<T>();
|
||||
|
||||
cudaSafeCall( cudaCreateTextureObject(tex, &resDesc, &texDesc, NULL) );
|
||||
}
|
||||
}
|
||||
}}
|
||||
|
|
@ -106,8 +106,8 @@ namespace cv
|
|||
|
||||
size_t step;
|
||||
|
||||
__CV_CUDA_HOST_DEVICE__ T* ptr(int y = 0) { return ( T*)( ( char*)DevPtr<T>::data + y * step); }
|
||||
__CV_CUDA_HOST_DEVICE__ const T* ptr(int y = 0) const { return (const T*)( (const char*)DevPtr<T>::data + y * step); }
|
||||
__CV_CUDA_HOST_DEVICE__ T* ptr(int y = 0) { return ( T*)( ( char*)(((DevPtr<T>*)this)->data) + y * step); }
|
||||
__CV_CUDA_HOST_DEVICE__ const T* ptr(int y = 0) const { return (const T*)( (const char*)(((DevPtr<T>*)this)->data) + y * step); }
|
||||
|
||||
__CV_CUDA_HOST_DEVICE__ T& operator ()(int y, int x) { return ptr(y)[x]; }
|
||||
__CV_CUDA_HOST_DEVICE__ const T& operator ()(int y, int x) const { return ptr(y)[x]; }
|
|
@ -72,7 +72,7 @@
|
|||
# define CV_AVX 1
|
||||
#endif
|
||||
#ifdef CV_CPU_COMPILE_FP16
|
||||
# if defined(__arm__) || defined(__aarch64__) || defined(_M_ARM)
|
||||
# if defined(__arm__) || defined(__aarch64__) || defined(_M_ARM) || defined(_M_ARM64)
|
||||
# include <arm_neon.h>
|
||||
# else
|
||||
# include <immintrin.h>
|
||||
|
@ -87,15 +87,53 @@
|
|||
# include <immintrin.h>
|
||||
# define CV_AVX_512F 1
|
||||
#endif
|
||||
#ifdef CV_CPU_COMPILE_AVX512_COMMON
|
||||
# define CV_AVX512_COMMON 1
|
||||
# define CV_AVX_512CD 1
|
||||
#endif
|
||||
#ifdef CV_CPU_COMPILE_AVX512_KNL
|
||||
# define CV_AVX512_KNL 1
|
||||
# define CV_AVX_512ER 1
|
||||
# define CV_AVX_512PF 1
|
||||
#endif
|
||||
#ifdef CV_CPU_COMPILE_AVX512_KNM
|
||||
# define CV_AVX512_KNM 1
|
||||
# define CV_AVX_5124FMAPS 1
|
||||
# define CV_AVX_5124VNNIW 1
|
||||
# define CV_AVX_512VPOPCNTDQ 1
|
||||
#endif
|
||||
#ifdef CV_CPU_COMPILE_AVX512_SKX
|
||||
# include <immintrin.h>
|
||||
# define CV_AVX512_SKX 1
|
||||
# define CV_AVX_512VL 1
|
||||
# define CV_AVX_512BW 1
|
||||
# define CV_AVX_512DQ 1
|
||||
#endif
|
||||
#ifdef CV_CPU_COMPILE_AVX512_CNL
|
||||
# define CV_AVX512_CNL 1
|
||||
# define CV_AVX_512IFMA 1
|
||||
# define CV_AVX_512VBMI 1
|
||||
#endif
|
||||
#ifdef CV_CPU_COMPILE_AVX512_CLX
|
||||
# define CV_AVX512_CLX 1
|
||||
# define CV_AVX_512VNNI 1
|
||||
#endif
|
||||
#ifdef CV_CPU_COMPILE_AVX512_ICL
|
||||
# define CV_AVX512_ICL 1
|
||||
# undef CV_AVX_512IFMA
|
||||
# define CV_AVX_512IFMA 1
|
||||
# undef CV_AVX_512VBMI
|
||||
# define CV_AVX_512VBMI 1
|
||||
# undef CV_AVX_512VNNI
|
||||
# define CV_AVX_512VNNI 1
|
||||
# define CV_AVX_512VBMI2 1
|
||||
# define CV_AVX_512BITALG 1
|
||||
# define CV_AVX_512VPOPCNTDQ 1
|
||||
#endif
|
||||
#ifdef CV_CPU_COMPILE_FMA3
|
||||
# define CV_FMA3 1
|
||||
#endif
|
||||
|
||||
#if defined _WIN32 && defined(_M_ARM)
|
||||
#if defined _WIN32 && (defined(_M_ARM) || defined(_M_ARM64)) && (defined(CV_CPU_COMPILE_NEON) || !defined(_MSC_VER))
|
||||
# include <Intrin.h>
|
||||
# include <arm_neon.h>
|
||||
# define CV_NEON 1
|
||||
|
@ -120,6 +158,16 @@
|
|||
# define CV_VSX3 1
|
||||
#endif
|
||||
|
||||
#ifdef CV_CPU_COMPILE_MSA
|
||||
# include "hal/msa_macros.h"
|
||||
# define CV_MSA 1
|
||||
#endif
|
||||
|
||||
#ifdef __EMSCRIPTEN__
|
||||
# define CV_WASM_SIMD 1
|
||||
# include <wasm_simd128.h>
|
||||
#endif
|
||||
|
||||
#endif // CV_ENABLE_INTRINSICS && !CV_DISABLE_OPTIMIZATION && !__CUDACC__
|
||||
|
||||
#if defined CV_CPU_COMPILE_AVX && !defined CV_CPU_BASELINE_COMPILE_AVX
|
||||
|
@ -153,7 +201,7 @@ struct VZeroUpperGuard {
|
|||
# define CV_MMX 1
|
||||
# define CV_SSE 1
|
||||
# define CV_SSE2 1
|
||||
#elif defined _WIN32 && defined(_M_ARM)
|
||||
#elif defined _WIN32 && (defined(_M_ARM) || defined(_M_ARM64)) && (defined(CV_CPU_COMPILE_NEON) || !defined(_MSC_VER))
|
||||
# include <Intrin.h>
|
||||
# include <arm_neon.h>
|
||||
# define CV_NEON 1
|
||||
|
@ -168,6 +216,11 @@ struct VZeroUpperGuard {
|
|||
# define CV_VSX 1
|
||||
#endif
|
||||
|
||||
#ifdef __F16C__
|
||||
# include <immintrin.h>
|
||||
# define CV_FP16 1
|
||||
#endif
|
||||
|
||||
#endif // !__OPENCV_BUILD && !__CUDACC (Compatibility code)
|
||||
|
||||
|
||||
|
@ -223,9 +276,10 @@ struct VZeroUpperGuard {
|
|||
#ifndef CV_AVX_512ER
|
||||
# define CV_AVX_512ER 0
|
||||
#endif
|
||||
#ifndef CV_AVX_512IFMA512
|
||||
# define CV_AVX_512IFMA512 0
|
||||
#ifndef CV_AVX_512IFMA
|
||||
# define CV_AVX_512IFMA 0
|
||||
#endif
|
||||
#define CV_AVX_512IFMA512 CV_AVX_512IFMA // deprecated
|
||||
#ifndef CV_AVX_512PF
|
||||
# define CV_AVX_512PF 0
|
||||
#endif
|
||||
|
@ -235,9 +289,45 @@ struct VZeroUpperGuard {
|
|||
#ifndef CV_AVX_512VL
|
||||
# define CV_AVX_512VL 0
|
||||
#endif
|
||||
#ifndef CV_AVX_5124FMAPS
|
||||
# define CV_AVX_5124FMAPS 0
|
||||
#endif
|
||||
#ifndef CV_AVX_5124VNNIW
|
||||
# define CV_AVX_5124VNNIW 0
|
||||
#endif
|
||||
#ifndef CV_AVX_512VPOPCNTDQ
|
||||
# define CV_AVX_512VPOPCNTDQ 0
|
||||
#endif
|
||||
#ifndef CV_AVX_512VNNI
|
||||
# define CV_AVX_512VNNI 0
|
||||
#endif
|
||||
#ifndef CV_AVX_512VBMI2
|
||||
# define CV_AVX_512VBMI2 0
|
||||
#endif
|
||||
#ifndef CV_AVX_512BITALG
|
||||
# define CV_AVX_512BITALG 0
|
||||
#endif
|
||||
#ifndef CV_AVX512_COMMON
|
||||
# define CV_AVX512_COMMON 0
|
||||
#endif
|
||||
#ifndef CV_AVX512_KNL
|
||||
# define CV_AVX512_KNL 0
|
||||
#endif
|
||||
#ifndef CV_AVX512_KNM
|
||||
# define CV_AVX512_KNM 0
|
||||
#endif
|
||||
#ifndef CV_AVX512_SKX
|
||||
# define CV_AVX512_SKX 0
|
||||
#endif
|
||||
#ifndef CV_AVX512_CNL
|
||||
# define CV_AVX512_CNL 0
|
||||
#endif
|
||||
#ifndef CV_AVX512_CLX
|
||||
# define CV_AVX512_CLX 0
|
||||
#endif
|
||||
#ifndef CV_AVX512_ICL
|
||||
# define CV_AVX512_ICL 0
|
||||
#endif
|
||||
|
||||
#ifndef CV_NEON
|
||||
# define CV_NEON 0
|
||||
|
@ -250,3 +340,11 @@ struct VZeroUpperGuard {
|
|||
#ifndef CV_VSX3
|
||||
# define CV_VSX3 0
|
||||
#endif
|
||||
|
||||
#ifndef CV_MSA
|
||||
# define CV_MSA 0
|
||||
#endif
|
||||
|
||||
#ifndef CV_WASM_SIMD
|
||||
# define CV_WASM_SIMD 0
|
||||
#endif
|
|
@ -252,6 +252,69 @@
|
|||
#endif
|
||||
#define __CV_CPU_DISPATCH_CHAIN_AVX_512F(fn, args, mode, ...) CV_CPU_CALL_AVX_512F(fn, args); __CV_EXPAND(__CV_CPU_DISPATCH_CHAIN_ ## mode(fn, args, __VA_ARGS__))
|
||||
|
||||
#if !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_COMPILE_AVX512_COMMON
|
||||
# define CV_TRY_AVX512_COMMON 1
|
||||
# define CV_CPU_FORCE_AVX512_COMMON 1
|
||||
# define CV_CPU_HAS_SUPPORT_AVX512_COMMON 1
|
||||
# define CV_CPU_CALL_AVX512_COMMON(fn, args) return (cpu_baseline::fn args)
|
||||
# define CV_CPU_CALL_AVX512_COMMON_(fn, args) return (opt_AVX512_COMMON::fn args)
|
||||
#elif !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_DISPATCH_COMPILE_AVX512_COMMON
|
||||
# define CV_TRY_AVX512_COMMON 1
|
||||
# define CV_CPU_FORCE_AVX512_COMMON 0
|
||||
# define CV_CPU_HAS_SUPPORT_AVX512_COMMON (cv::checkHardwareSupport(CV_CPU_AVX512_COMMON))
|
||||
# define CV_CPU_CALL_AVX512_COMMON(fn, args) if (CV_CPU_HAS_SUPPORT_AVX512_COMMON) return (opt_AVX512_COMMON::fn args)
|
||||
# define CV_CPU_CALL_AVX512_COMMON_(fn, args) if (CV_CPU_HAS_SUPPORT_AVX512_COMMON) return (opt_AVX512_COMMON::fn args)
|
||||
#else
|
||||
# define CV_TRY_AVX512_COMMON 0
|
||||
# define CV_CPU_FORCE_AVX512_COMMON 0
|
||||
# define CV_CPU_HAS_SUPPORT_AVX512_COMMON 0
|
||||
# define CV_CPU_CALL_AVX512_COMMON(fn, args)
|
||||
# define CV_CPU_CALL_AVX512_COMMON_(fn, args)
|
||||
#endif
|
||||
#define __CV_CPU_DISPATCH_CHAIN_AVX512_COMMON(fn, args, mode, ...) CV_CPU_CALL_AVX512_COMMON(fn, args); __CV_EXPAND(__CV_CPU_DISPATCH_CHAIN_ ## mode(fn, args, __VA_ARGS__))
|
||||
|
||||
#if !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_COMPILE_AVX512_KNL
|
||||
# define CV_TRY_AVX512_KNL 1
|
||||
# define CV_CPU_FORCE_AVX512_KNL 1
|
||||
# define CV_CPU_HAS_SUPPORT_AVX512_KNL 1
|
||||
# define CV_CPU_CALL_AVX512_KNL(fn, args) return (cpu_baseline::fn args)
|
||||
# define CV_CPU_CALL_AVX512_KNL_(fn, args) return (opt_AVX512_KNL::fn args)
|
||||
#elif !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_DISPATCH_COMPILE_AVX512_KNL
|
||||
# define CV_TRY_AVX512_KNL 1
|
||||
# define CV_CPU_FORCE_AVX512_KNL 0
|
||||
# define CV_CPU_HAS_SUPPORT_AVX512_KNL (cv::checkHardwareSupport(CV_CPU_AVX512_KNL))
|
||||
# define CV_CPU_CALL_AVX512_KNL(fn, args) if (CV_CPU_HAS_SUPPORT_AVX512_KNL) return (opt_AVX512_KNL::fn args)
|
||||
# define CV_CPU_CALL_AVX512_KNL_(fn, args) if (CV_CPU_HAS_SUPPORT_AVX512_KNL) return (opt_AVX512_KNL::fn args)
|
||||
#else
|
||||
# define CV_TRY_AVX512_KNL 0
|
||||
# define CV_CPU_FORCE_AVX512_KNL 0
|
||||
# define CV_CPU_HAS_SUPPORT_AVX512_KNL 0
|
||||
# define CV_CPU_CALL_AVX512_KNL(fn, args)
|
||||
# define CV_CPU_CALL_AVX512_KNL_(fn, args)
|
||||
#endif
|
||||
#define __CV_CPU_DISPATCH_CHAIN_AVX512_KNL(fn, args, mode, ...) CV_CPU_CALL_AVX512_KNL(fn, args); __CV_EXPAND(__CV_CPU_DISPATCH_CHAIN_ ## mode(fn, args, __VA_ARGS__))
|
||||
|
||||
#if !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_COMPILE_AVX512_KNM
|
||||
# define CV_TRY_AVX512_KNM 1
|
||||
# define CV_CPU_FORCE_AVX512_KNM 1
|
||||
# define CV_CPU_HAS_SUPPORT_AVX512_KNM 1
|
||||
# define CV_CPU_CALL_AVX512_KNM(fn, args) return (cpu_baseline::fn args)
|
||||
# define CV_CPU_CALL_AVX512_KNM_(fn, args) return (opt_AVX512_KNM::fn args)
|
||||
#elif !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_DISPATCH_COMPILE_AVX512_KNM
|
||||
# define CV_TRY_AVX512_KNM 1
|
||||
# define CV_CPU_FORCE_AVX512_KNM 0
|
||||
# define CV_CPU_HAS_SUPPORT_AVX512_KNM (cv::checkHardwareSupport(CV_CPU_AVX512_KNM))
|
||||
# define CV_CPU_CALL_AVX512_KNM(fn, args) if (CV_CPU_HAS_SUPPORT_AVX512_KNM) return (opt_AVX512_KNM::fn args)
|
||||
# define CV_CPU_CALL_AVX512_KNM_(fn, args) if (CV_CPU_HAS_SUPPORT_AVX512_KNM) return (opt_AVX512_KNM::fn args)
|
||||
#else
|
||||
# define CV_TRY_AVX512_KNM 0
|
||||
# define CV_CPU_FORCE_AVX512_KNM 0
|
||||
# define CV_CPU_HAS_SUPPORT_AVX512_KNM 0
|
||||
# define CV_CPU_CALL_AVX512_KNM(fn, args)
|
||||
# define CV_CPU_CALL_AVX512_KNM_(fn, args)
|
||||
#endif
|
||||
#define __CV_CPU_DISPATCH_CHAIN_AVX512_KNM(fn, args, mode, ...) CV_CPU_CALL_AVX512_KNM(fn, args); __CV_EXPAND(__CV_CPU_DISPATCH_CHAIN_ ## mode(fn, args, __VA_ARGS__))
|
||||
|
||||
#if !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_COMPILE_AVX512_SKX
|
||||
# define CV_TRY_AVX512_SKX 1
|
||||
# define CV_CPU_FORCE_AVX512_SKX 1
|
||||
|
@ -273,6 +336,69 @@
|
|||
#endif
|
||||
#define __CV_CPU_DISPATCH_CHAIN_AVX512_SKX(fn, args, mode, ...) CV_CPU_CALL_AVX512_SKX(fn, args); __CV_EXPAND(__CV_CPU_DISPATCH_CHAIN_ ## mode(fn, args, __VA_ARGS__))
|
||||
|
||||
#if !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_COMPILE_AVX512_CNL
|
||||
# define CV_TRY_AVX512_CNL 1
|
||||
# define CV_CPU_FORCE_AVX512_CNL 1
|
||||
# define CV_CPU_HAS_SUPPORT_AVX512_CNL 1
|
||||
# define CV_CPU_CALL_AVX512_CNL(fn, args) return (cpu_baseline::fn args)
|
||||
# define CV_CPU_CALL_AVX512_CNL_(fn, args) return (opt_AVX512_CNL::fn args)
|
||||
#elif !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_DISPATCH_COMPILE_AVX512_CNL
|
||||
# define CV_TRY_AVX512_CNL 1
|
||||
# define CV_CPU_FORCE_AVX512_CNL 0
|
||||
# define CV_CPU_HAS_SUPPORT_AVX512_CNL (cv::checkHardwareSupport(CV_CPU_AVX512_CNL))
|
||||
# define CV_CPU_CALL_AVX512_CNL(fn, args) if (CV_CPU_HAS_SUPPORT_AVX512_CNL) return (opt_AVX512_CNL::fn args)
|
||||
# define CV_CPU_CALL_AVX512_CNL_(fn, args) if (CV_CPU_HAS_SUPPORT_AVX512_CNL) return (opt_AVX512_CNL::fn args)
|
||||
#else
|
||||
# define CV_TRY_AVX512_CNL 0
|
||||
# define CV_CPU_FORCE_AVX512_CNL 0
|
||||
# define CV_CPU_HAS_SUPPORT_AVX512_CNL 0
|
||||
# define CV_CPU_CALL_AVX512_CNL(fn, args)
|
||||
# define CV_CPU_CALL_AVX512_CNL_(fn, args)
|
||||
#endif
|
||||
#define __CV_CPU_DISPATCH_CHAIN_AVX512_CNL(fn, args, mode, ...) CV_CPU_CALL_AVX512_CNL(fn, args); __CV_EXPAND(__CV_CPU_DISPATCH_CHAIN_ ## mode(fn, args, __VA_ARGS__))
|
||||
|
||||
#if !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_COMPILE_AVX512_CLX
|
||||
# define CV_TRY_AVX512_CLX 1
|
||||
# define CV_CPU_FORCE_AVX512_CLX 1
|
||||
# define CV_CPU_HAS_SUPPORT_AVX512_CLX 1
|
||||
# define CV_CPU_CALL_AVX512_CLX(fn, args) return (cpu_baseline::fn args)
|
||||
# define CV_CPU_CALL_AVX512_CLX_(fn, args) return (opt_AVX512_CLX::fn args)
|
||||
#elif !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_DISPATCH_COMPILE_AVX512_CLX
|
||||
# define CV_TRY_AVX512_CLX 1
|
||||
# define CV_CPU_FORCE_AVX512_CLX 0
|
||||
# define CV_CPU_HAS_SUPPORT_AVX512_CLX (cv::checkHardwareSupport(CV_CPU_AVX512_CLX))
|
||||
# define CV_CPU_CALL_AVX512_CLX(fn, args) if (CV_CPU_HAS_SUPPORT_AVX512_CLX) return (opt_AVX512_CLX::fn args)
|
||||
# define CV_CPU_CALL_AVX512_CLX_(fn, args) if (CV_CPU_HAS_SUPPORT_AVX512_CLX) return (opt_AVX512_CLX::fn args)
|
||||
#else
|
||||
# define CV_TRY_AVX512_CLX 0
|
||||
# define CV_CPU_FORCE_AVX512_CLX 0
|
||||
# define CV_CPU_HAS_SUPPORT_AVX512_CLX 0
|
||||
# define CV_CPU_CALL_AVX512_CLX(fn, args)
|
||||
# define CV_CPU_CALL_AVX512_CLX_(fn, args)
|
||||
#endif
|
||||
#define __CV_CPU_DISPATCH_CHAIN_AVX512_CLX(fn, args, mode, ...) CV_CPU_CALL_AVX512_CLX(fn, args); __CV_EXPAND(__CV_CPU_DISPATCH_CHAIN_ ## mode(fn, args, __VA_ARGS__))
|
||||
|
||||
#if !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_COMPILE_AVX512_ICL
|
||||
# define CV_TRY_AVX512_ICL 1
|
||||
# define CV_CPU_FORCE_AVX512_ICL 1
|
||||
# define CV_CPU_HAS_SUPPORT_AVX512_ICL 1
|
||||
# define CV_CPU_CALL_AVX512_ICL(fn, args) return (cpu_baseline::fn args)
|
||||
# define CV_CPU_CALL_AVX512_ICL_(fn, args) return (opt_AVX512_ICL::fn args)
|
||||
#elif !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_DISPATCH_COMPILE_AVX512_ICL
|
||||
# define CV_TRY_AVX512_ICL 1
|
||||
# define CV_CPU_FORCE_AVX512_ICL 0
|
||||
# define CV_CPU_HAS_SUPPORT_AVX512_ICL (cv::checkHardwareSupport(CV_CPU_AVX512_ICL))
|
||||
# define CV_CPU_CALL_AVX512_ICL(fn, args) if (CV_CPU_HAS_SUPPORT_AVX512_ICL) return (opt_AVX512_ICL::fn args)
|
||||
# define CV_CPU_CALL_AVX512_ICL_(fn, args) if (CV_CPU_HAS_SUPPORT_AVX512_ICL) return (opt_AVX512_ICL::fn args)
|
||||
#else
|
||||
# define CV_TRY_AVX512_ICL 0
|
||||
# define CV_CPU_FORCE_AVX512_ICL 0
|
||||
# define CV_CPU_HAS_SUPPORT_AVX512_ICL 0
|
||||
# define CV_CPU_CALL_AVX512_ICL(fn, args)
|
||||
# define CV_CPU_CALL_AVX512_ICL_(fn, args)
|
||||
#endif
|
||||
#define __CV_CPU_DISPATCH_CHAIN_AVX512_ICL(fn, args, mode, ...) CV_CPU_CALL_AVX512_ICL(fn, args); __CV_EXPAND(__CV_CPU_DISPATCH_CHAIN_ ## mode(fn, args, __VA_ARGS__))
|
||||
|
||||
#if !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_COMPILE_NEON
|
||||
# define CV_TRY_NEON 1
|
||||
# define CV_CPU_FORCE_NEON 1
|
||||
|
@ -294,6 +420,27 @@
|
|||
#endif
|
||||
#define __CV_CPU_DISPATCH_CHAIN_NEON(fn, args, mode, ...) CV_CPU_CALL_NEON(fn, args); __CV_EXPAND(__CV_CPU_DISPATCH_CHAIN_ ## mode(fn, args, __VA_ARGS__))
|
||||
|
||||
#if !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_COMPILE_MSA
|
||||
# define CV_TRY_MSA 1
|
||||
# define CV_CPU_FORCE_MSA 1
|
||||
# define CV_CPU_HAS_SUPPORT_MSA 1
|
||||
# define CV_CPU_CALL_MSA(fn, args) return (cpu_baseline::fn args)
|
||||
# define CV_CPU_CALL_MSA_(fn, args) return (opt_MSA::fn args)
|
||||
#elif !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_DISPATCH_COMPILE_MSA
|
||||
# define CV_TRY_MSA 1
|
||||
# define CV_CPU_FORCE_MSA 0
|
||||
# define CV_CPU_HAS_SUPPORT_MSA (cv::checkHardwareSupport(CV_CPU_MSA))
|
||||
# define CV_CPU_CALL_MSA(fn, args) if (CV_CPU_HAS_SUPPORT_MSA) return (opt_MSA::fn args)
|
||||
# define CV_CPU_CALL_MSA_(fn, args) if (CV_CPU_HAS_SUPPORT_MSA) return (opt_MSA::fn args)
|
||||
#else
|
||||
# define CV_TRY_MSA 0
|
||||
# define CV_CPU_FORCE_MSA 0
|
||||
# define CV_CPU_HAS_SUPPORT_MSA 0
|
||||
# define CV_CPU_CALL_MSA(fn, args)
|
||||
# define CV_CPU_CALL_MSA_(fn, args)
|
||||
#endif
|
||||
#define __CV_CPU_DISPATCH_CHAIN_MSA(fn, args, mode, ...) CV_CPU_CALL_MSA(fn, args); __CV_EXPAND(__CV_CPU_DISPATCH_CHAIN_ ## mode(fn, args, __VA_ARGS__))
|
||||
|
||||
#if !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_COMPILE_VSX
|
||||
# define CV_TRY_VSX 1
|
||||
# define CV_CPU_FORCE_VSX 1
|
|
@ -45,9 +45,15 @@
|
|||
#ifndef OPENCV_CORE_CVDEF_H
|
||||
#define OPENCV_CORE_CVDEF_H
|
||||
|
||||
#include "opencv2/core/version.hpp"
|
||||
|
||||
//! @addtogroup core_utils
|
||||
//! @{
|
||||
|
||||
#ifdef OPENCV_INCLUDE_PORT_FILE // User-provided header file with custom platform configuration
|
||||
#include OPENCV_INCLUDE_PORT_FILE
|
||||
#endif
|
||||
|
||||
#if !defined CV_DOXYGEN && !defined CV_IGNORE_DEBUG_BUILD_GUARD
|
||||
#if (defined(_MSC_VER) && (defined(DEBUG) || defined(_DEBUG))) || \
|
||||
(defined(_GLIBCXX_DEBUG) || defined(_GLIBCXX_DEBUG_PEDANTIC))
|
||||
|
@ -82,12 +88,24 @@ namespace cv { namespace debug_build_guard { } using namespace debug_build_guard
|
|||
#define __CV_VA_NUM_ARGS_HELPER(_1, _2, _3, _4, _5, _6, _7, _8, _9, _10, N, ...) N
|
||||
#define __CV_VA_NUM_ARGS(...) __CV_VA_NUM_ARGS_HELPER(__VA_ARGS__, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0)
|
||||
|
||||
#if defined __GNUC__
|
||||
#ifdef CV_Func
|
||||
// keep current value (through OpenCV port file)
|
||||
#elif defined __GNUC__ || (defined (__cpluscplus) && (__cpluscplus >= 201103))
|
||||
#define CV_Func __func__
|
||||
#elif defined __clang__ && (__clang_minor__ * 100 + __clang_major__ >= 305)
|
||||
#define CV_Func __func__
|
||||
#elif defined(__STDC_VERSION__) && (__STDC_VERSION >= 199901)
|
||||
#define CV_Func __func__
|
||||
#elif defined _MSC_VER
|
||||
#define CV_Func __FUNCTION__
|
||||
#elif defined(__INTEL_COMPILER) && (_INTEL_COMPILER >= 600)
|
||||
#define CV_Func __FUNCTION__
|
||||
#elif defined __IBMCPP__ && __IBMCPP__ >=500
|
||||
#define CV_Func __FUNCTION__
|
||||
#elif defined __BORLAND__ && (__BORLANDC__ >= 0x550)
|
||||
#define CV_Func __FUNC__
|
||||
#else
|
||||
#define CV_Func ""
|
||||
#define CV_Func "<unknown>"
|
||||
#endif
|
||||
|
||||
//! @cond IGNORED
|
||||
|
@ -118,9 +136,11 @@ namespace cv { namespace debug_build_guard { } using namespace debug_build_guard
|
|||
# if !defined(__clang__) && defined(__GNUC__) && (__GNUC__*100 + __GNUC_MINOR__ > 302)
|
||||
# define CV_StaticAssert(condition, reason) ({ extern int __attribute__((error("CV_StaticAssert: " reason " " #condition))) CV_StaticAssert(); ((condition) ? 0 : CV_StaticAssert()); })
|
||||
# else
|
||||
namespace cv {
|
||||
template <bool x> struct CV_StaticAssert_failed;
|
||||
template <> struct CV_StaticAssert_failed<true> { enum { val = 1 }; };
|
||||
template<int x> struct CV_StaticAssert_test {};
|
||||
}
|
||||
# define CV_StaticAssert(condition, reason)\
|
||||
typedef cv::CV_StaticAssert_test< sizeof(cv::CV_StaticAssert_failed< static_cast<bool>(condition) >) > CVAUX_CONCAT(CV_StaticAssert_failed_at_, __LINE__)
|
||||
# endif
|
||||
|
@ -175,7 +195,12 @@ namespace cv { namespace debug_build_guard { } using namespace debug_build_guard
|
|||
#undef abs
|
||||
#undef Complex
|
||||
|
||||
#if defined __cplusplus
|
||||
#include <limits>
|
||||
#else
|
||||
#include <limits.h>
|
||||
#endif
|
||||
|
||||
#include "opencv2/core/hal/interface.h"
|
||||
|
||||
#if defined __ICL
|
||||
|
@ -249,14 +274,28 @@ namespace cv { namespace debug_build_guard { } using namespace debug_build_guard
|
|||
#define CV_CPU_AVX_512PF 19
|
||||
#define CV_CPU_AVX_512VBMI 20
|
||||
#define CV_CPU_AVX_512VL 21
|
||||
#define CV_CPU_AVX_512VBMI2 22
|
||||
#define CV_CPU_AVX_512VNNI 23
|
||||
#define CV_CPU_AVX_512BITALG 24
|
||||
#define CV_CPU_AVX_512VPOPCNTDQ 25
|
||||
#define CV_CPU_AVX_5124VNNIW 26
|
||||
#define CV_CPU_AVX_5124FMAPS 27
|
||||
|
||||
#define CV_CPU_NEON 100
|
||||
|
||||
#define CV_CPU_MSA 150
|
||||
|
||||
#define CV_CPU_VSX 200
|
||||
#define CV_CPU_VSX3 201
|
||||
|
||||
// CPU features groups
|
||||
#define CV_CPU_AVX512_SKX 256
|
||||
#define CV_CPU_AVX512_COMMON 257
|
||||
#define CV_CPU_AVX512_KNL 258
|
||||
#define CV_CPU_AVX512_KNM 259
|
||||
#define CV_CPU_AVX512_CNL 260
|
||||
#define CV_CPU_AVX512_CLX 261
|
||||
#define CV_CPU_AVX512_ICL 262
|
||||
|
||||
// when adding to this list remember to update the following enum
|
||||
#define CV_HARDWARE_MAX_FEATURE 512
|
||||
|
@ -287,13 +326,27 @@ enum CpuFeatures {
|
|||
CPU_AVX_512PF = 19,
|
||||
CPU_AVX_512VBMI = 20,
|
||||
CPU_AVX_512VL = 21,
|
||||
CPU_AVX_512VBMI2 = 22,
|
||||
CPU_AVX_512VNNI = 23,
|
||||
CPU_AVX_512BITALG = 24,
|
||||
CPU_AVX_512VPOPCNTDQ= 25,
|
||||
CPU_AVX_5124VNNIW = 26,
|
||||
CPU_AVX_5124FMAPS = 27,
|
||||
|
||||
CPU_NEON = 100,
|
||||
|
||||
CPU_MSA = 150,
|
||||
|
||||
CPU_VSX = 200,
|
||||
CPU_VSX3 = 201,
|
||||
|
||||
CPU_AVX512_SKX = 256, //!< Skylake-X with AVX-512F/CD/BW/DQ/VL
|
||||
CPU_AVX512_COMMON = 257, //!< Common instructions AVX-512F/CD for all CPUs that support AVX-512
|
||||
CPU_AVX512_KNL = 258, //!< Knights Landing with AVX-512F/CD/ER/PF
|
||||
CPU_AVX512_KNM = 259, //!< Knights Mill with AVX-512F/CD/ER/PF/4FMAPS/4VNNIW/VPOPCNTDQ
|
||||
CPU_AVX512_CNL = 260, //!< Cannon Lake with AVX-512F/CD/BW/DQ/VL/IFMA/VBMI
|
||||
CPU_AVX512_CLX = 261, //!< Cascade Lake with AVX-512F/CD/BW/DQ/VL/VNNI
|
||||
CPU_AVX512_ICL = 262, //!< Ice Lake with AVX-512F/CD/BW/DQ/VL/IFMA/VBMI/VNNI/VBMI2/BITALG/VPOPCNTDQ
|
||||
|
||||
CPU_MAX_FEATURE = 512 // see CV_HARDWARE_MAX_FEATURE
|
||||
};
|
||||
|
@ -301,6 +354,13 @@ enum CpuFeatures {
|
|||
|
||||
#include "cv_cpu_dispatch.h"
|
||||
|
||||
#if !defined(CV_STRONG_ALIGNMENT) && defined(__arm__) && !(defined(__aarch64__) || defined(_M_ARM64))
|
||||
// int*, int64* should be propertly aligned pointers on ARMv7
|
||||
#define CV_STRONG_ALIGNMENT 1
|
||||
#endif
|
||||
#if !defined(CV_STRONG_ALIGNMENT)
|
||||
#define CV_STRONG_ALIGNMENT 0
|
||||
#endif
|
||||
|
||||
/* fundamental constants */
|
||||
#define CV_PI 3.1415926535897932384626433832795
|
||||
|
@ -340,17 +400,19 @@ typedef union Cv64suf
|
|||
}
|
||||
Cv64suf;
|
||||
|
||||
#ifndef OPENCV_ABI_COMPATIBILITY
|
||||
#define OPENCV_ABI_COMPATIBILITY 300
|
||||
#endif
|
||||
|
||||
#ifdef __OPENCV_BUILD
|
||||
# define DISABLE_OPENCV_24_COMPATIBILITY
|
||||
# define OPENCV_DISABLE_DEPRECATED_COMPATIBILITY
|
||||
#endif
|
||||
|
||||
#ifdef CVAPI_EXPORTS
|
||||
# if (defined _WIN32 || defined WINCE || defined __CYGWIN__)
|
||||
#ifndef CV_EXPORTS
|
||||
# if (defined _WIN32 || defined WINCE || defined __CYGWIN__) && defined(CVAPI_EXPORTS)
|
||||
# define CV_EXPORTS __declspec(dllexport)
|
||||
# elif defined __GNUC__ && __GNUC__ >= 4
|
||||
# elif defined __GNUC__ && __GNUC__ >= 4 && (defined(CVAPI_EXPORTS) || defined(__APPLE__))
|
||||
# define CV_EXPORTS __attribute__ ((visibility ("default")))
|
||||
# endif
|
||||
#endif
|
||||
|
@ -491,7 +553,11 @@ Cv64suf;
|
|||
# include <intrin.h>
|
||||
# define CV_XADD(addr, delta) (int)_InterlockedExchangeAdd((long volatile*)addr, delta)
|
||||
#else
|
||||
CV_INLINE CV_XADD(int* addr, int delta) { int tmp = *addr; *addr += delta; return tmp; }
|
||||
#ifdef OPENCV_FORCE_UNSAFE_XADD
|
||||
CV_INLINE CV_XADD(int* addr, int delta) { int tmp = *addr; *addr += delta; return tmp; }
|
||||
#else
|
||||
#error "OpenCV: can't define safe CV_XADD macro for current platform (unsupported). Define CV_XADD macro through custom port header (see OPENCV_INCLUDE_PORT_FILE)"
|
||||
#endif
|
||||
#endif
|
||||
|
||||
|
||||
|
@ -560,6 +626,13 @@ Cv64suf;
|
|||
# endif
|
||||
#endif
|
||||
|
||||
#ifdef CV_CXX_MOVE_SEMANTICS
|
||||
#define CV_CXX_MOVE(x) std::move(x)
|
||||
#else
|
||||
#define CV_CXX_MOVE(x) (x)
|
||||
#endif
|
||||
|
||||
|
||||
/****************************************************************************************\
|
||||
* C++11 std::array *
|
||||
\****************************************************************************************/
|
||||
|
@ -598,6 +671,19 @@ Cv64suf;
|
|||
# define CV_FINAL
|
||||
#endif
|
||||
|
||||
/****************************************************************************************\
|
||||
* C++11 noexcept *
|
||||
\****************************************************************************************/
|
||||
|
||||
#ifndef CV_NOEXCEPT
|
||||
# if __cplusplus >= 201103L || (defined(_MSC_VER) && _MSC_VER >= 1900/*MSVS 2015*/)
|
||||
# define CV_NOEXCEPT noexcept
|
||||
# endif
|
||||
#endif
|
||||
#ifndef CV_NOEXCEPT
|
||||
# define CV_NOEXCEPT
|
||||
#endif
|
||||
|
||||
|
||||
|
||||
// Integer types portatibility
|
||||
|
@ -683,7 +769,7 @@ protected:
|
|||
float16_t() {}
|
||||
explicit float16_t(float x)
|
||||
{
|
||||
#if CV_AVX2
|
||||
#if CV_FP16
|
||||
__m128 v = _mm_load_ss(&x);
|
||||
w = (ushort)_mm_cvtsi128_si32(_mm_cvtps_ph(v, 0));
|
||||
#else
|
||||
|
@ -714,7 +800,7 @@ protected:
|
|||
|
||||
operator float() const
|
||||
{
|
||||
#if CV_AVX2
|
||||
#if CV_FP16
|
||||
float f;
|
||||
_mm_store_ss(&f, _mm_cvtph_ps(_mm_cvtsi32_si128(w)));
|
||||
return f;
|
|
@ -1026,6 +1026,40 @@ static inline bool operator>= (const String& lhs, const String& rhs) { return lh
|
|||
static inline bool operator>= (const char* lhs, const String& rhs) { return rhs.compare(lhs) <= 0; }
|
||||
static inline bool operator>= (const String& lhs, const char* rhs) { return lhs.compare(rhs) >= 0; }
|
||||
|
||||
|
||||
#ifndef OPENCV_DISABLE_STRING_LOWER_UPPER_CONVERSIONS
|
||||
|
||||
//! @cond IGNORED
|
||||
namespace details {
|
||||
// std::tolower is int->int
|
||||
static inline char char_tolower(char ch)
|
||||
{
|
||||
return (char)std::tolower((int)ch);
|
||||
}
|
||||
// std::toupper is int->int
|
||||
static inline char char_toupper(char ch)
|
||||
{
|
||||
return (char)std::toupper((int)ch);
|
||||
}
|
||||
} // namespace details
|
||||
//! @endcond
|
||||
|
||||
static inline std::string toLowerCase(const std::string& str)
|
||||
{
|
||||
std::string result(str);
|
||||
std::transform(result.begin(), result.end(), result.begin(), details::char_tolower);
|
||||
return result;
|
||||
}
|
||||
|
||||
static inline std::string toUpperCase(const std::string& str)
|
||||
{
|
||||
std::string result(str);
|
||||
std::transform(result.begin(), result.end(), result.begin(), details::char_toupper);
|
||||
return result;
|
||||
}
|
||||
|
||||
#endif // OPENCV_DISABLE_STRING_LOWER_UPPER_CONVERSIONS
|
||||
|
||||
//! @} relates cv::String
|
||||
|
||||
} // cv
|
|
@ -46,6 +46,7 @@
|
|||
|
||||
#include <complex>
|
||||
#include <ostream>
|
||||
#include <sstream>
|
||||
|
||||
//! @cond IGNORED
|
||||
|
|
@ -0,0 +1,71 @@
|
|||
// This file is part of OpenCV project.
|
||||
// It is subject to the license terms in the LICENSE file found in the top-level directory
|
||||
// of this distribution and at http://opencv.org/license.html.
|
||||
|
||||
#ifndef OPENCV_CORE_ASYNC_PROMISE_HPP
|
||||
#define OPENCV_CORE_ASYNC_PROMISE_HPP
|
||||
|
||||
#include "../async.hpp"
|
||||
|
||||
#include "exception_ptr.hpp"
|
||||
|
||||
namespace cv {
|
||||
|
||||
/** @addtogroup core_async
|
||||
@{
|
||||
*/
|
||||
|
||||
|
||||
/** @brief Provides result of asynchronous operations
|
||||
|
||||
*/
|
||||
class CV_EXPORTS AsyncPromise
|
||||
{
|
||||
public:
|
||||
~AsyncPromise() CV_NOEXCEPT;
|
||||
AsyncPromise() CV_NOEXCEPT;
|
||||
explicit AsyncPromise(const AsyncPromise& o) CV_NOEXCEPT;
|
||||
AsyncPromise& operator=(const AsyncPromise& o) CV_NOEXCEPT;
|
||||
void release() CV_NOEXCEPT;
|
||||
|
||||
/** Returns associated AsyncArray
|
||||
@note Can be called once
|
||||
*/
|
||||
AsyncArray getArrayResult();
|
||||
|
||||
/** Stores asynchronous result.
|
||||
@param[in] value result
|
||||
*/
|
||||
void setValue(InputArray value);
|
||||
|
||||
// TODO "move" setters
|
||||
|
||||
#if CV__EXCEPTION_PTR
|
||||
/** Stores exception.
|
||||
@param[in] exception exception to be raised in AsyncArray
|
||||
*/
|
||||
void setException(std::exception_ptr exception);
|
||||
#endif
|
||||
|
||||
/** Stores exception.
|
||||
@param[in] exception exception to be raised in AsyncArray
|
||||
*/
|
||||
void setException(const cv::Exception& exception);
|
||||
|
||||
#ifdef CV_CXX11
|
||||
explicit AsyncPromise(AsyncPromise&& o) { p = o.p; o.p = NULL; }
|
||||
AsyncPromise& operator=(AsyncPromise&& o) CV_NOEXCEPT { std::swap(p, o.p); return *this; }
|
||||
#endif
|
||||
|
||||
|
||||
// PImpl
|
||||
typedef struct AsyncArray::Impl Impl; friend struct AsyncArray::Impl;
|
||||
inline void* _getImpl() const CV_NOEXCEPT { return p; }
|
||||
protected:
|
||||
Impl* p;
|
||||
};
|
||||
|
||||
|
||||
//! @}
|
||||
} // namespace
|
||||
#endif // OPENCV_CORE_ASYNC_PROMISE_HPP
|
|
@ -0,0 +1,27 @@
|
|||
// This file is part of OpenCV project.
|
||||
// It is subject to the license terms in the LICENSE file found in the top-level directory
|
||||
// of this distribution and at http://opencv.org/license.html.
|
||||
|
||||
#ifndef OPENCV_CORE_DETAILS_EXCEPTION_PTR_H
|
||||
#define OPENCV_CORE_DETAILS_EXCEPTION_PTR_H
|
||||
|
||||
#ifndef CV__EXCEPTION_PTR
|
||||
# if defined(__ANDROID__) && defined(ATOMIC_INT_LOCK_FREE) && ATOMIC_INT_LOCK_FREE < 2
|
||||
# define CV__EXCEPTION_PTR 0 // Not supported, details: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=58938
|
||||
# elif defined(CV_CXX11)
|
||||
# define CV__EXCEPTION_PTR 1
|
||||
# elif defined(_MSC_VER)
|
||||
# define CV__EXCEPTION_PTR (_MSC_VER >= 1600)
|
||||
# elif defined(__clang__)
|
||||
# define CV__EXCEPTION_PTR 0 // C++11 only (see above)
|
||||
# elif defined(__GNUC__) && defined(__GXX_EXPERIMENTAL_CXX0X__)
|
||||
# define CV__EXCEPTION_PTR (__GXX_EXPERIMENTAL_CXX0X__ > 0)
|
||||
# endif
|
||||
#endif
|
||||
#ifndef CV__EXCEPTION_PTR
|
||||
# define CV__EXCEPTION_PTR 0
|
||||
#elif CV__EXCEPTION_PTR
|
||||
# include <exception> // std::exception_ptr
|
||||
#endif
|
||||
|
||||
#endif // OPENCV_CORE_DETAILS_EXCEPTION_PTR_H
|
|
@ -45,20 +45,142 @@
|
|||
#ifndef OPENCV_CORE_EIGEN_HPP
|
||||
#define OPENCV_CORE_EIGEN_HPP
|
||||
|
||||
#ifndef EIGEN_WORLD_VERSION
|
||||
#error "Wrong usage of OpenCV's Eigen utility header. Include Eigen's headers first. See https://github.com/opencv/opencv/issues/17366"
|
||||
#endif
|
||||
|
||||
#include "opencv2/core.hpp"
|
||||
|
||||
#if defined _MSC_VER && _MSC_VER >= 1200
|
||||
#define NOMINMAX // fix https://github.com/opencv/opencv/issues/17548
|
||||
#pragma warning( disable: 4714 ) //__forceinline is not inlined
|
||||
#pragma warning( disable: 4127 ) //conditional expression is constant
|
||||
#pragma warning( disable: 4244 ) //conversion from '__int64' to 'int', possible loss of data
|
||||
#endif
|
||||
|
||||
#if !defined(OPENCV_DISABLE_EIGEN_TENSOR_SUPPORT)
|
||||
#if EIGEN_WORLD_VERSION == 3 && EIGEN_MAJOR_VERSION >= 3 \
|
||||
&& defined(CV_CXX11) && defined(CV_CXX_STD_ARRAY)
|
||||
#include <unsupported/Eigen/CXX11/Tensor>
|
||||
#define OPENCV_EIGEN_TENSOR_SUPPORT 1
|
||||
#endif // EIGEN_WORLD_VERSION == 3 && EIGEN_MAJOR_VERSION >= 3
|
||||
#endif // !defined(OPENCV_DISABLE_EIGEN_TENSOR_SUPPORT)
|
||||
|
||||
namespace cv
|
||||
{
|
||||
|
||||
//! @addtogroup core_eigen
|
||||
/** @addtogroup core_eigen
|
||||
These functions are provided for OpenCV-Eigen interoperability. They convert `Mat`
|
||||
objects to corresponding `Eigen::Matrix` objects and vice-versa. Consult the [Eigen
|
||||
documentation](https://eigen.tuxfamily.org/dox/group__TutorialMatrixClass.html) for
|
||||
information about the `Matrix` template type.
|
||||
|
||||
@note Using these functions requires the `Eigen/Dense` or similar header to be
|
||||
included before this header.
|
||||
*/
|
||||
//! @{
|
||||
|
||||
#if defined(OPENCV_EIGEN_TENSOR_SUPPORT) || defined(CV_DOXYGEN)
|
||||
/** @brief Converts an Eigen::Tensor to a cv::Mat.
|
||||
|
||||
The method converts an Eigen::Tensor with shape (H x W x C) to a cv::Mat where:
|
||||
H = number of rows
|
||||
W = number of columns
|
||||
C = number of channels
|
||||
|
||||
Usage:
|
||||
\code
|
||||
Eigen::Tensor<float, 3, Eigen::RowMajor> a_tensor(...);
|
||||
// populate tensor with values
|
||||
Mat a_mat;
|
||||
eigen2cv(a_tensor, a_mat);
|
||||
\endcode
|
||||
*/
|
||||
template <typename _Tp, int _layout> static inline
|
||||
void eigen2cv( const Eigen::Tensor<_Tp, 3, _layout> &src, OutputArray dst )
|
||||
{
|
||||
if( !(_layout & Eigen::RowMajorBit) )
|
||||
{
|
||||
const std::array<int, 3> shuffle{2, 1, 0};
|
||||
Eigen::Tensor<_Tp, 3, !_layout> row_major_tensor = src.swap_layout().shuffle(shuffle);
|
||||
Mat _src(src.dimension(0), src.dimension(1), CV_MAKETYPE(DataType<_Tp>::type, src.dimension(2)), row_major_tensor.data());
|
||||
_src.copyTo(dst);
|
||||
}
|
||||
else
|
||||
{
|
||||
Mat _src(src.dimension(0), src.dimension(1), CV_MAKETYPE(DataType<_Tp>::type, src.dimension(2)), (void *)src.data());
|
||||
_src.copyTo(dst);
|
||||
}
|
||||
}
|
||||
|
||||
/** @brief Converts a cv::Mat to an Eigen::Tensor.
|
||||
|
||||
The method converts a cv::Mat to an Eigen Tensor with shape (H x W x C) where:
|
||||
H = number of rows
|
||||
W = number of columns
|
||||
C = number of channels
|
||||
|
||||
Usage:
|
||||
\code
|
||||
Mat a_mat(...);
|
||||
// populate Mat with values
|
||||
Eigen::Tensor<float, 3, Eigen::RowMajor> a_tensor(...);
|
||||
cv2eigen(a_mat, a_tensor);
|
||||
\endcode
|
||||
*/
|
||||
template <typename _Tp, int _layout> static inline
|
||||
void cv2eigen( const Mat &src, Eigen::Tensor<_Tp, 3, _layout> &dst )
|
||||
{
|
||||
if( !(_layout & Eigen::RowMajorBit) )
|
||||
{
|
||||
Eigen::Tensor<_Tp, 3, !_layout> row_major_tensor(src.rows, src.cols, src.channels());
|
||||
Mat _dst(src.rows, src.cols, CV_MAKETYPE(DataType<_Tp>::type, src.channels()), row_major_tensor.data());
|
||||
if (src.type() == _dst.type())
|
||||
src.copyTo(_dst);
|
||||
else
|
||||
src.convertTo(_dst, _dst.type());
|
||||
const std::array<int, 3> shuffle{2, 1, 0};
|
||||
dst = row_major_tensor.swap_layout().shuffle(shuffle);
|
||||
}
|
||||
else
|
||||
{
|
||||
dst.resize(src.rows, src.cols, src.channels());
|
||||
Mat _dst(src.rows, src.cols, CV_MAKETYPE(DataType<_Tp>::type, src.channels()), dst.data());
|
||||
if (src.type() == _dst.type())
|
||||
src.copyTo(_dst);
|
||||
else
|
||||
src.convertTo(_dst, _dst.type());
|
||||
}
|
||||
}
|
||||
|
||||
/** @brief Maps cv::Mat data to an Eigen::TensorMap.
|
||||
|
||||
The method wraps an existing Mat data array with an Eigen TensorMap of shape (H x W x C) where:
|
||||
H = number of rows
|
||||
W = number of columns
|
||||
C = number of channels
|
||||
|
||||
Explicit instantiation of the return type is required.
|
||||
|
||||
@note Caller should be aware of the lifetime of the cv::Mat instance and take appropriate safety measures.
|
||||
The cv::Mat instance will retain ownership of the data and the Eigen::TensorMap will lose access when the cv::Mat data is deallocated.
|
||||
|
||||
The example below initializes a cv::Mat and produces an Eigen::TensorMap:
|
||||
\code
|
||||
float arr[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11};
|
||||
Mat a_mat(2, 2, CV_32FC3, arr);
|
||||
Eigen::TensorMap<Eigen::Tensor<float, 3, Eigen::RowMajor>> a_tensormap = cv2eigen_tensormap<float>(a_mat);
|
||||
\endcode
|
||||
*/
|
||||
template <typename _Tp> static inline
|
||||
Eigen::TensorMap<Eigen::Tensor<_Tp, 3, Eigen::RowMajor>> cv2eigen_tensormap(InputArray src)
|
||||
{
|
||||
Mat mat = src.getMat();
|
||||
CV_CheckTypeEQ(mat.type(), CV_MAKETYPE(traits::Type<_Tp>::value, mat.channels()), "");
|
||||
return Eigen::TensorMap<Eigen::Tensor<_Tp, 3, Eigen::RowMajor>>((_Tp *)mat.data, mat.rows, mat.cols, mat.channels());
|
||||
}
|
||||
#endif // OPENCV_EIGEN_TENSOR_SUPPORT
|
||||
|
||||
template<typename _Tp, int _rows, int _cols, int _options, int _maxRows, int _maxCols> static inline
|
||||
void eigen2cv( const Eigen::Matrix<_Tp, _rows, _cols, _options, _maxRows, _maxCols>& src, OutputArray dst )
|
||||
{
|
|
@ -47,12 +47,6 @@
|
|||
|
||||
#include "opencv2/core/cvdef.h"
|
||||
|
||||
#if ((defined _MSC_VER && defined _M_X64) || (defined __GNUC__ && defined __x86_64__ \
|
||||
&& defined __SSE2__ && !defined __APPLE__)) && !defined(__CUDACC__)
|
||||
#include <emmintrin.h>
|
||||
#endif
|
||||
|
||||
|
||||
//! @addtogroup core_utils
|
||||
//! @{
|
||||
|
||||
|
@ -70,11 +64,27 @@
|
|||
# endif
|
||||
#endif
|
||||
|
||||
#ifdef HAVE_TEGRA_OPTIMIZATION
|
||||
# include "tegra_round.hpp"
|
||||
#endif
|
||||
#if defined(__CUDACC__)
|
||||
// nothing, intrinsics/asm code is not supported
|
||||
#else
|
||||
#if ((defined _MSC_VER && defined _M_X64) \
|
||||
|| (defined __GNUC__ && defined __x86_64__ && defined __SSE2__)) \
|
||||
&& !defined(OPENCV_SKIP_INCLUDE_EMMINTRIN_H)
|
||||
#include <emmintrin.h>
|
||||
#endif
|
||||
|
||||
#if defined __GNUC__ && defined __arm__ && (defined __ARM_PCS_VFP || defined __ARM_VFPV3__ || defined __ARM_NEON__) && !defined __SOFTFP__ && !defined(__CUDACC__)
|
||||
#if defined __PPC64__ && defined __GNUC__ && defined _ARCH_PWR8 \
|
||||
&& !defined(OPENCV_SKIP_INCLUDE_ALTIVEC_H)
|
||||
#include <altivec.h>
|
||||
#undef vector
|
||||
#undef bool
|
||||
#undef pixel
|
||||
#endif
|
||||
|
||||
#if defined(CV_INLINE_ROUND_FLT)
|
||||
// user-specified version
|
||||
// CV_INLINE_ROUND_DBL should be defined too
|
||||
#elif defined __GNUC__ && defined __arm__ && (defined __ARM_PCS_VFP || defined __ARM_VFPV3__ || defined __ARM_NEON__) && !defined __SOFTFP__
|
||||
// 1. general scheme
|
||||
#define ARM_ROUND(_value, _asm_string) \
|
||||
int res; \
|
||||
|
@ -84,13 +94,102 @@
|
|||
return res
|
||||
// 2. version for double
|
||||
#ifdef __clang__
|
||||
#define ARM_ROUND_DBL(value) ARM_ROUND(value, "vcvtr.s32.f64 %[temp], %[value] \n vmov %[res], %[temp]")
|
||||
#define CV_INLINE_ROUND_DBL(value) ARM_ROUND(value, "vcvtr.s32.f64 %[temp], %[value] \n vmov %[res], %[temp]")
|
||||
#else
|
||||
#define ARM_ROUND_DBL(value) ARM_ROUND(value, "vcvtr.s32.f64 %[temp], %P[value] \n vmov %[res], %[temp]")
|
||||
#define CV_INLINE_ROUND_DBL(value) ARM_ROUND(value, "vcvtr.s32.f64 %[temp], %P[value] \n vmov %[res], %[temp]")
|
||||
#endif
|
||||
// 3. version for float
|
||||
#define ARM_ROUND_FLT(value) ARM_ROUND(value, "vcvtr.s32.f32 %[temp], %[value]\n vmov %[res], %[temp]")
|
||||
#endif
|
||||
#define CV_INLINE_ROUND_FLT(value) ARM_ROUND(value, "vcvtr.s32.f32 %[temp], %[value]\n vmov %[res], %[temp]")
|
||||
#elif defined __PPC64__ && defined __GNUC__ && defined _ARCH_PWR8
|
||||
// P8 and newer machines can convert fp32/64 to int quickly.
|
||||
#define CV_INLINE_ROUND_DBL(value) \
|
||||
int out; \
|
||||
double temp; \
|
||||
__asm__( "fctiw %[temp],%[in]\n\tmfvsrwz %[out],%[temp]\n\t" : [out] "=r" (out), [temp] "=d" (temp) : [in] "d" ((double)(value)) : ); \
|
||||
return out;
|
||||
|
||||
// FP32 also works with FP64 routine above
|
||||
#define CV_INLINE_ROUND_FLT(value) CV_INLINE_ROUND_DBL(value)
|
||||
#endif
|
||||
|
||||
#ifdef CV_INLINE_ISINF_FLT
|
||||
// user-specified version
|
||||
// CV_INLINE_ISINF_DBL should be defined too
|
||||
#elif defined __PPC64__ && defined _ARCH_PWR9 && defined(scalar_test_data_class)
|
||||
#define CV_INLINE_ISINF_DBL(value) return scalar_test_data_class(value, 0x30);
|
||||
#define CV_INLINE_ISINF_FLT(value) CV_INLINE_ISINF_DBL(value)
|
||||
#endif
|
||||
|
||||
#ifdef CV_INLINE_ISNAN_FLT
|
||||
// user-specified version
|
||||
// CV_INLINE_ISNAN_DBL should be defined too
|
||||
#elif defined __PPC64__ && defined _ARCH_PWR9 && defined(scalar_test_data_class)
|
||||
#define CV_INLINE_ISNAN_DBL(value) return scalar_test_data_class(value, 0x40);
|
||||
#define CV_INLINE_ISNAN_FLT(value) CV_INLINE_ISNAN_DBL(value)
|
||||
#endif
|
||||
|
||||
#if !defined(OPENCV_USE_FASTMATH_BUILTINS) \
|
||||
&& ( \
|
||||
defined(__x86_64__) || defined(__i686__) \
|
||||
|| defined(__arm__) \
|
||||
|| defined(__PPC64__) \
|
||||
)
|
||||
/* Let builtin C math functions when available. Dedicated hardware is available to
|
||||
round and convert FP values. */
|
||||
#define OPENCV_USE_FASTMATH_BUILTINS 1
|
||||
#endif
|
||||
|
||||
/* Enable builtin math functions if possible, desired, and available.
|
||||
Note, not all math functions inline equally. E.g lrint will not inline
|
||||
without the -fno-math-errno option. */
|
||||
#if defined(CV_ICC)
|
||||
// nothing
|
||||
#elif defined(OPENCV_USE_FASTMATH_BUILTINS) && OPENCV_USE_FASTMATH_BUILTINS
|
||||
#if defined(__clang__)
|
||||
#define CV__FASTMATH_ENABLE_CLANG_MATH_BUILTINS
|
||||
#if !defined(CV_INLINE_ISNAN_DBL) && __has_builtin(__builtin_isnan)
|
||||
#define CV_INLINE_ISNAN_DBL(value) return __builtin_isnan(value);
|
||||
#endif
|
||||
#if !defined(CV_INLINE_ISNAN_FLT) && __has_builtin(__builtin_isnan)
|
||||
#define CV_INLINE_ISNAN_FLT(value) return __builtin_isnan(value);
|
||||
#endif
|
||||
#if !defined(CV_INLINE_ISINF_DBL) && __has_builtin(__builtin_isinf)
|
||||
#define CV_INLINE_ISINF_DBL(value) return __builtin_isinf(value);
|
||||
#endif
|
||||
#if !defined(CV_INLINE_ISINF_FLT) && __has_builtin(__builtin_isinf)
|
||||
#define CV_INLINE_ISINF_FLT(value) return __builtin_isinf(value);
|
||||
#endif
|
||||
#elif defined(__GNUC__)
|
||||
#define CV__FASTMATH_ENABLE_GCC_MATH_BUILTINS
|
||||
#if !defined(CV_INLINE_ISNAN_DBL)
|
||||
#define CV_INLINE_ISNAN_DBL(value) return __builtin_isnan(value);
|
||||
#endif
|
||||
#if !defined(CV_INLINE_ISNAN_FLT)
|
||||
#define CV_INLINE_ISNAN_FLT(value) return __builtin_isnanf(value);
|
||||
#endif
|
||||
#if !defined(CV_INLINE_ISINF_DBL)
|
||||
#define CV_INLINE_ISINF_DBL(value) return __builtin_isinf(value);
|
||||
#endif
|
||||
#if !defined(CV_INLINE_ISINF_FLT)
|
||||
#define CV_INLINE_ISINF_FLT(value) return __builtin_isinff(value);
|
||||
#endif
|
||||
#elif defined(_MSC_VER)
|
||||
#if !defined(CV_INLINE_ISNAN_DBL)
|
||||
#define CV_INLINE_ISNAN_DBL(value) return isnan(value);
|
||||
#endif
|
||||
#if !defined(CV_INLINE_ISNAN_FLT)
|
||||
#define CV_INLINE_ISNAN_FLT(value) return isnan(value);
|
||||
#endif
|
||||
#if !defined(CV_INLINE_ISINF_DBL)
|
||||
#define CV_INLINE_ISINF_DBL(value) return isinf(value);
|
||||
#endif
|
||||
#if !defined(CV_INLINE_ISINF_FLT)
|
||||
#define CV_INLINE_ISINF_FLT(value) return isinf(value);
|
||||
#endif
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#endif // defined(__CUDACC__)
|
||||
|
||||
/** @brief Rounds floating-point number to the nearest integer
|
||||
|
||||
|
@ -100,8 +199,11 @@
|
|||
CV_INLINE int
|
||||
cvRound( double value )
|
||||
{
|
||||
#if ((defined _MSC_VER && defined _M_X64) || (defined __GNUC__ && defined __x86_64__ \
|
||||
&& defined __SSE2__ && !defined __APPLE__) || CV_SSE2) && !defined(__CUDACC__)
|
||||
#if defined CV_INLINE_ROUND_DBL
|
||||
CV_INLINE_ROUND_DBL(value);
|
||||
#elif ((defined _MSC_VER && defined _M_X64) || (defined __GNUC__ && defined __x86_64__ \
|
||||
&& defined __SSE2__ && !defined __APPLE__) || CV_SSE2) \
|
||||
&& !defined(__CUDACC__)
|
||||
__m128d t = _mm_set_sd( value );
|
||||
return _mm_cvtsd_si32(t);
|
||||
#elif defined _MSC_VER && defined _M_IX86
|
||||
|
@ -112,15 +214,8 @@ cvRound( double value )
|
|||
fistp t;
|
||||
}
|
||||
return t;
|
||||
#elif ((defined _MSC_VER && defined _M_ARM) || defined CV_ICC || \
|
||||
defined __GNUC__) && defined HAVE_TEGRA_OPTIMIZATION
|
||||
TEGRA_ROUND_DBL(value);
|
||||
#elif defined CV_ICC || defined __GNUC__
|
||||
# if defined ARM_ROUND_DBL
|
||||
ARM_ROUND_DBL(value);
|
||||
# else
|
||||
return (int)lrint(value);
|
||||
# endif
|
||||
return (int)(lrint(value));
|
||||
#else
|
||||
/* it's ok if round does not comply with IEEE754 standard;
|
||||
the tests should allow +/-1 difference when the tested functions use round */
|
||||
|
@ -138,8 +233,15 @@ cvRound( double value )
|
|||
*/
|
||||
CV_INLINE int cvFloor( double value )
|
||||
{
|
||||
#if (defined CV__FASTMATH_ENABLE_GCC_MATH_BUILTINS || defined CV__FASTMATH_ENABLE_CLANG_MATH_BUILTINS) \
|
||||
&& ( \
|
||||
defined(__PPC64__) \
|
||||
)
|
||||
return __builtin_floor(value);
|
||||
#else
|
||||
int i = (int)value;
|
||||
return i - (i > value);
|
||||
#endif
|
||||
}
|
||||
|
||||
/** @brief Rounds floating-point number to the nearest integer not smaller than the original.
|
||||
|
@ -151,8 +253,15 @@ CV_INLINE int cvFloor( double value )
|
|||
*/
|
||||
CV_INLINE int cvCeil( double value )
|
||||
{
|
||||
#if (defined CV__FASTMATH_ENABLE_GCC_MATH_BUILTINS || defined CV__FASTMATH_ENABLE_CLANG_MATH_BUILTINS) \
|
||||
&& ( \
|
||||
defined(__PPC64__) \
|
||||
)
|
||||
return __builtin_ceil(value);
|
||||
#else
|
||||
int i = (int)value;
|
||||
return i + (i < value);
|
||||
#endif
|
||||
}
|
||||
|
||||
/** @brief Determines if the argument is Not A Number.
|
||||
|
@ -163,10 +272,14 @@ CV_INLINE int cvCeil( double value )
|
|||
otherwise. */
|
||||
CV_INLINE int cvIsNaN( double value )
|
||||
{
|
||||
#if defined CV_INLINE_ISNAN_DBL
|
||||
CV_INLINE_ISNAN_DBL(value);
|
||||
#else
|
||||
Cv64suf ieee754;
|
||||
ieee754.f = value;
|
||||
return ((unsigned)(ieee754.u >> 32) & 0x7fffffff) +
|
||||
((unsigned)ieee754.u != 0) > 0x7ff00000;
|
||||
#endif
|
||||
}
|
||||
|
||||
/** @brief Determines if the argument is Infinity.
|
||||
|
@ -177,10 +290,19 @@ CV_INLINE int cvIsNaN( double value )
|
|||
and 0 otherwise. */
|
||||
CV_INLINE int cvIsInf( double value )
|
||||
{
|
||||
#if defined CV_INLINE_ISINF_DBL
|
||||
CV_INLINE_ISINF_DBL(value);
|
||||
#elif defined(__x86_64__) || defined(_M_X64) || defined(__aarch64__) || defined(_M_ARM64) || defined(__PPC64__)
|
||||
Cv64suf ieee754;
|
||||
ieee754.f = value;
|
||||
return (ieee754.u & 0x7fffffff00000000) ==
|
||||
0x7ff0000000000000;
|
||||
#else
|
||||
Cv64suf ieee754;
|
||||
ieee754.f = value;
|
||||
return ((unsigned)(ieee754.u >> 32) & 0x7fffffff) == 0x7ff00000 &&
|
||||
(unsigned)ieee754.u == 0;
|
||||
#endif
|
||||
}
|
||||
|
||||
#ifdef __cplusplus
|
||||
|
@ -188,8 +310,11 @@ CV_INLINE int cvIsInf( double value )
|
|||
/** @overload */
|
||||
CV_INLINE int cvRound(float value)
|
||||
{
|
||||
#if ((defined _MSC_VER && defined _M_X64) || (defined __GNUC__ && defined __x86_64__ \
|
||||
&& defined __SSE2__ && !defined __APPLE__) || CV_SSE2) && !defined(__CUDACC__)
|
||||
#if defined CV_INLINE_ROUND_FLT
|
||||
CV_INLINE_ROUND_FLT(value);
|
||||
#elif ((defined _MSC_VER && defined _M_X64) || (defined __GNUC__ && defined __x86_64__ \
|
||||
&& defined __SSE2__ && !defined __APPLE__) || CV_SSE2) \
|
||||
&& !defined(__CUDACC__)
|
||||
__m128 t = _mm_set_ss( value );
|
||||
return _mm_cvtss_si32(t);
|
||||
#elif defined _MSC_VER && defined _M_IX86
|
||||
|
@ -200,15 +325,8 @@ CV_INLINE int cvRound(float value)
|
|||
fistp t;
|
||||
}
|
||||
return t;
|
||||
#elif ((defined _MSC_VER && defined _M_ARM) || defined CV_ICC || \
|
||||
defined __GNUC__) && defined HAVE_TEGRA_OPTIMIZATION
|
||||
TEGRA_ROUND_FLT(value);
|
||||
#elif defined CV_ICC || defined __GNUC__
|
||||
# if defined ARM_ROUND_FLT
|
||||
ARM_ROUND_FLT(value);
|
||||
# else
|
||||
return (int)lrintf(value);
|
||||
# endif
|
||||
return (int)(lrintf(value));
|
||||
#else
|
||||
/* it's ok if round does not comply with IEEE754 standard;
|
||||
the tests should allow +/-1 difference when the tested functions use round */
|
||||
|
@ -225,8 +343,15 @@ CV_INLINE int cvRound( int value )
|
|||
/** @overload */
|
||||
CV_INLINE int cvFloor( float value )
|
||||
{
|
||||
#if (defined CV__FASTMATH_ENABLE_GCC_MATH_BUILTINS || defined CV__FASTMATH_ENABLE_CLANG_MATH_BUILTINS) \
|
||||
&& ( \
|
||||
defined(__PPC64__) \
|
||||
)
|
||||
return __builtin_floorf(value);
|
||||
#else
|
||||
int i = (int)value;
|
||||
return i - (i > value);
|
||||
#endif
|
||||
}
|
||||
|
||||
/** @overload */
|
||||
|
@ -238,8 +363,15 @@ CV_INLINE int cvFloor( int value )
|
|||
/** @overload */
|
||||
CV_INLINE int cvCeil( float value )
|
||||
{
|
||||
#if (defined CV__FASTMATH_ENABLE_GCC_MATH_BUILTINS || defined CV__FASTMATH_ENABLE_CLANG_MATH_BUILTINS) \
|
||||
&& ( \
|
||||
defined(__PPC64__) \
|
||||
)
|
||||
return __builtin_ceilf(value);
|
||||
#else
|
||||
int i = (int)value;
|
||||
return i + (i < value);
|
||||
#endif
|
||||
}
|
||||
|
||||
/** @overload */
|
||||
|
@ -251,17 +383,25 @@ CV_INLINE int cvCeil( int value )
|
|||
/** @overload */
|
||||
CV_INLINE int cvIsNaN( float value )
|
||||
{
|
||||
#if defined CV_INLINE_ISNAN_FLT
|
||||
CV_INLINE_ISNAN_FLT(value);
|
||||
#else
|
||||
Cv32suf ieee754;
|
||||
ieee754.f = value;
|
||||
return (ieee754.u & 0x7fffffff) > 0x7f800000;
|
||||
#endif
|
||||
}
|
||||
|
||||
/** @overload */
|
||||
CV_INLINE int cvIsInf( float value )
|
||||
{
|
||||
#if defined CV_INLINE_ISINF_FLT
|
||||
CV_INLINE_ISINF_FLT(value);
|
||||
#else
|
||||
Cv32suf ieee754;
|
||||
ieee754.f = value;
|
||||
return (ieee754.u & 0x7fffffff) == 0x7f800000;
|
||||
#endif
|
||||
}
|
||||
|
||||
#endif // __cplusplus
|
|
@ -0,0 +1,698 @@
|
|||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to this license.
|
||||
// If you do not agree to this license, do not download, install,
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
|
||||
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
|
||||
// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
|
||||
// Copyright (C) 2015, Itseez Inc., all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
//
|
||||
// * The name of the copyright holders may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors "as is" and
|
||||
// any express or implied warranties, including, but not limited to, the implied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
|
||||
#ifndef OPENCV_HAL_INTRIN_HPP
|
||||
#define OPENCV_HAL_INTRIN_HPP
|
||||
|
||||
#include <cmath>
|
||||
#include <float.h>
|
||||
#include <stdlib.h>
|
||||
#include "opencv2/core/cvdef.h"
|
||||
|
||||
#define OPENCV_HAL_ADD(a, b) ((a) + (b))
|
||||
#define OPENCV_HAL_AND(a, b) ((a) & (b))
|
||||
#define OPENCV_HAL_NOP(a) (a)
|
||||
#define OPENCV_HAL_1ST(a, b) (a)
|
||||
|
||||
namespace {
|
||||
inline unsigned int trailingZeros32(unsigned int value) {
|
||||
#if defined(_MSC_VER)
|
||||
#if (_MSC_VER < 1700) || defined(_M_ARM) || defined(_M_ARM64)
|
||||
unsigned long index = 0;
|
||||
_BitScanForward(&index, value);
|
||||
return (unsigned int)index;
|
||||
#elif defined(__clang__)
|
||||
// clang-cl doesn't export _tzcnt_u32 for non BMI systems
|
||||
return value ? __builtin_ctz(value) : 32;
|
||||
#else
|
||||
return _tzcnt_u32(value);
|
||||
#endif
|
||||
#elif defined(__GNUC__) || defined(__GNUG__)
|
||||
return __builtin_ctz(value);
|
||||
#elif defined(__ICC) || defined(__INTEL_COMPILER)
|
||||
return _bit_scan_forward(value);
|
||||
#elif defined(__clang__)
|
||||
return llvm.cttz.i32(value, true);
|
||||
#else
|
||||
static const int MultiplyDeBruijnBitPosition[32] = {
|
||||
0, 1, 28, 2, 29, 14, 24, 3, 30, 22, 20, 15, 25, 17, 4, 8,
|
||||
31, 27, 13, 23, 21, 19, 16, 7, 26, 12, 18, 6, 11, 5, 10, 9 };
|
||||
return MultiplyDeBruijnBitPosition[((uint32_t)((value & -value) * 0x077CB531U)) >> 27];
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
||||
// unlike HAL API, which is in cv::hal,
|
||||
// we put intrinsics into cv namespace to make its
|
||||
// access from within opencv code more accessible
|
||||
namespace cv {
|
||||
|
||||
namespace hal {
|
||||
|
||||
enum StoreMode
|
||||
{
|
||||
STORE_UNALIGNED = 0,
|
||||
STORE_ALIGNED = 1,
|
||||
STORE_ALIGNED_NOCACHE = 2
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
// TODO FIXIT: Don't use "God" traits. Split on separate cases.
|
||||
template<typename _Tp> struct V_TypeTraits
|
||||
{
|
||||
};
|
||||
|
||||
#define CV_INTRIN_DEF_TYPE_TRAITS(type, int_type_, uint_type_, abs_type_, w_type_, q_type_, sum_type_) \
|
||||
template<> struct V_TypeTraits<type> \
|
||||
{ \
|
||||
typedef type value_type; \
|
||||
typedef int_type_ int_type; \
|
||||
typedef abs_type_ abs_type; \
|
||||
typedef uint_type_ uint_type; \
|
||||
typedef w_type_ w_type; \
|
||||
typedef q_type_ q_type; \
|
||||
typedef sum_type_ sum_type; \
|
||||
\
|
||||
static inline int_type reinterpret_int(type x) \
|
||||
{ \
|
||||
union { type l; int_type i; } v; \
|
||||
v.l = x; \
|
||||
return v.i; \
|
||||
} \
|
||||
\
|
||||
static inline type reinterpret_from_int(int_type x) \
|
||||
{ \
|
||||
union { type l; int_type i; } v; \
|
||||
v.i = x; \
|
||||
return v.l; \
|
||||
} \
|
||||
}
|
||||
|
||||
#define CV_INTRIN_DEF_TYPE_TRAITS_NO_Q_TYPE(type, int_type_, uint_type_, abs_type_, w_type_, sum_type_) \
|
||||
template<> struct V_TypeTraits<type> \
|
||||
{ \
|
||||
typedef type value_type; \
|
||||
typedef int_type_ int_type; \
|
||||
typedef abs_type_ abs_type; \
|
||||
typedef uint_type_ uint_type; \
|
||||
typedef w_type_ w_type; \
|
||||
typedef sum_type_ sum_type; \
|
||||
\
|
||||
static inline int_type reinterpret_int(type x) \
|
||||
{ \
|
||||
union { type l; int_type i; } v; \
|
||||
v.l = x; \
|
||||
return v.i; \
|
||||
} \
|
||||
\
|
||||
static inline type reinterpret_from_int(int_type x) \
|
||||
{ \
|
||||
union { type l; int_type i; } v; \
|
||||
v.i = x; \
|
||||
return v.l; \
|
||||
} \
|
||||
}
|
||||
|
||||
CV_INTRIN_DEF_TYPE_TRAITS(uchar, schar, uchar, uchar, ushort, unsigned, unsigned);
|
||||
CV_INTRIN_DEF_TYPE_TRAITS(schar, schar, uchar, uchar, short, int, int);
|
||||
CV_INTRIN_DEF_TYPE_TRAITS(ushort, short, ushort, ushort, unsigned, uint64, unsigned);
|
||||
CV_INTRIN_DEF_TYPE_TRAITS(short, short, ushort, ushort, int, int64, int);
|
||||
CV_INTRIN_DEF_TYPE_TRAITS_NO_Q_TYPE(unsigned, int, unsigned, unsigned, uint64, unsigned);
|
||||
CV_INTRIN_DEF_TYPE_TRAITS_NO_Q_TYPE(int, int, unsigned, unsigned, int64, int);
|
||||
CV_INTRIN_DEF_TYPE_TRAITS_NO_Q_TYPE(float, int, unsigned, float, double, float);
|
||||
CV_INTRIN_DEF_TYPE_TRAITS_NO_Q_TYPE(uint64, int64, uint64, uint64, void, uint64);
|
||||
CV_INTRIN_DEF_TYPE_TRAITS_NO_Q_TYPE(int64, int64, uint64, uint64, void, int64);
|
||||
CV_INTRIN_DEF_TYPE_TRAITS_NO_Q_TYPE(double, int64, uint64, double, void, double);
|
||||
|
||||
#ifndef CV_DOXYGEN
|
||||
|
||||
#ifndef CV_CPU_OPTIMIZATION_HAL_NAMESPACE
|
||||
#ifdef CV_FORCE_SIMD128_CPP
|
||||
#define CV_CPU_OPTIMIZATION_HAL_NAMESPACE hal_EMULATOR_CPP
|
||||
#define CV_CPU_OPTIMIZATION_HAL_NAMESPACE_BEGIN namespace hal_EMULATOR_CPP {
|
||||
#define CV_CPU_OPTIMIZATION_HAL_NAMESPACE_END }
|
||||
#elif defined(CV_CPU_DISPATCH_MODE)
|
||||
#define CV_CPU_OPTIMIZATION_HAL_NAMESPACE __CV_CAT(hal_, CV_CPU_DISPATCH_MODE)
|
||||
#define CV_CPU_OPTIMIZATION_HAL_NAMESPACE_BEGIN namespace __CV_CAT(hal_, CV_CPU_DISPATCH_MODE) {
|
||||
#define CV_CPU_OPTIMIZATION_HAL_NAMESPACE_END }
|
||||
#else
|
||||
#define CV_CPU_OPTIMIZATION_HAL_NAMESPACE hal_baseline
|
||||
#define CV_CPU_OPTIMIZATION_HAL_NAMESPACE_BEGIN namespace hal_baseline {
|
||||
#define CV_CPU_OPTIMIZATION_HAL_NAMESPACE_END }
|
||||
#endif
|
||||
#endif // CV_CPU_OPTIMIZATION_HAL_NAMESPACE
|
||||
|
||||
CV_CPU_OPTIMIZATION_HAL_NAMESPACE_BEGIN
|
||||
CV_CPU_OPTIMIZATION_HAL_NAMESPACE_END
|
||||
using namespace CV_CPU_OPTIMIZATION_HAL_NAMESPACE;
|
||||
#endif
|
||||
}
|
||||
|
||||
#ifdef CV_DOXYGEN
|
||||
# undef CV_AVX2
|
||||
# undef CV_SSE2
|
||||
# undef CV_NEON
|
||||
# undef CV_VSX
|
||||
# undef CV_FP16
|
||||
# undef CV_MSA
|
||||
#endif
|
||||
|
||||
#if (CV_SSE2 || CV_NEON || CV_VSX || CV_MSA || CV_WASM_SIMD) && !defined(CV_FORCE_SIMD128_CPP)
|
||||
#define CV__SIMD_FORWARD 128
|
||||
#include "opencv2/core/hal/intrin_forward.hpp"
|
||||
#endif
|
||||
|
||||
#if CV_SSE2 && !defined(CV_FORCE_SIMD128_CPP)
|
||||
|
||||
#include "opencv2/core/hal/intrin_sse_em.hpp"
|
||||
#include "opencv2/core/hal/intrin_sse.hpp"
|
||||
|
||||
#elif CV_NEON && !defined(CV_FORCE_SIMD128_CPP)
|
||||
|
||||
#include "opencv2/core/hal/intrin_neon.hpp"
|
||||
|
||||
#elif CV_VSX && !defined(CV_FORCE_SIMD128_CPP)
|
||||
|
||||
#include "opencv2/core/hal/intrin_vsx.hpp"
|
||||
|
||||
#elif CV_MSA && !defined(CV_FORCE_SIMD128_CPP)
|
||||
|
||||
#include "opencv2/core/hal/intrin_msa.hpp"
|
||||
|
||||
#elif CV_WASM_SIMD && !defined(CV_FORCE_SIMD128_CPP)
|
||||
#include "opencv2/core/hal/intrin_wasm.hpp"
|
||||
|
||||
#else
|
||||
|
||||
#include "opencv2/core/hal/intrin_cpp.hpp"
|
||||
|
||||
#endif
|
||||
|
||||
// AVX2 can be used together with SSE2, so
|
||||
// we define those two sets of intrinsics at once.
|
||||
// Most of the intrinsics do not conflict (the proper overloaded variant is
|
||||
// resolved by the argument types, e.g. v_float32x4 ~ SSE2, v_float32x8 ~ AVX2),
|
||||
// but some of AVX2 intrinsics get v256_ prefix instead of v_, e.g. v256_load() vs v_load().
|
||||
// Correspondingly, the wide intrinsics (which are mapped to the "widest"
|
||||
// available instruction set) will get vx_ prefix
|
||||
// (and will be mapped to v256_ counterparts) (e.g. vx_load() => v256_load())
|
||||
#if CV_AVX2
|
||||
|
||||
#define CV__SIMD_FORWARD 256
|
||||
#include "opencv2/core/hal/intrin_forward.hpp"
|
||||
#include "opencv2/core/hal/intrin_avx.hpp"
|
||||
|
||||
#endif
|
||||
|
||||
// AVX512 can be used together with SSE2 and AVX2, so
|
||||
// we define those sets of intrinsics at once.
|
||||
// For some of AVX512 intrinsics get v512_ prefix instead of v_, e.g. v512_load() vs v_load().
|
||||
// Wide intrinsics will be mapped to v512_ counterparts in this case(e.g. vx_load() => v512_load())
|
||||
#if CV_AVX512_SKX
|
||||
|
||||
#define CV__SIMD_FORWARD 512
|
||||
#include "opencv2/core/hal/intrin_forward.hpp"
|
||||
#include "opencv2/core/hal/intrin_avx512.hpp"
|
||||
|
||||
#endif
|
||||
|
||||
//! @cond IGNORED
|
||||
|
||||
namespace cv {
|
||||
|
||||
#ifndef CV_DOXYGEN
|
||||
CV_CPU_OPTIMIZATION_HAL_NAMESPACE_BEGIN
|
||||
#endif
|
||||
|
||||
#ifndef CV_SIMD128
|
||||
#define CV_SIMD128 0
|
||||
#endif
|
||||
|
||||
#ifndef CV_SIMD128_CPP
|
||||
#define CV_SIMD128_CPP 0
|
||||
#endif
|
||||
|
||||
#ifndef CV_SIMD128_64F
|
||||
#define CV_SIMD128_64F 0
|
||||
#endif
|
||||
|
||||
#ifndef CV_SIMD256
|
||||
#define CV_SIMD256 0
|
||||
#endif
|
||||
|
||||
#ifndef CV_SIMD256_64F
|
||||
#define CV_SIMD256_64F 0
|
||||
#endif
|
||||
|
||||
#ifndef CV_SIMD512
|
||||
#define CV_SIMD512 0
|
||||
#endif
|
||||
|
||||
#ifndef CV_SIMD512_64F
|
||||
#define CV_SIMD512_64F 0
|
||||
#endif
|
||||
|
||||
#ifndef CV_SIMD128_FP16
|
||||
#define CV_SIMD128_FP16 0
|
||||
#endif
|
||||
|
||||
#ifndef CV_SIMD256_FP16
|
||||
#define CV_SIMD256_FP16 0
|
||||
#endif
|
||||
|
||||
#ifndef CV_SIMD512_FP16
|
||||
#define CV_SIMD512_FP16 0
|
||||
#endif
|
||||
|
||||
//==================================================================================================
|
||||
|
||||
template<typename _Tp> struct V_RegTraits
|
||||
{
|
||||
};
|
||||
|
||||
#define CV_DEF_REG_TRAITS(prefix, _reg, lane_type, suffix, _u_reg, _w_reg, _q_reg, _int_reg, _round_reg) \
|
||||
template<> struct V_RegTraits<_reg> \
|
||||
{ \
|
||||
typedef _reg reg; \
|
||||
typedef _u_reg u_reg; \
|
||||
typedef _w_reg w_reg; \
|
||||
typedef _q_reg q_reg; \
|
||||
typedef _int_reg int_reg; \
|
||||
typedef _round_reg round_reg; \
|
||||
}
|
||||
|
||||
#if CV_SIMD128 || CV_SIMD128_CPP
|
||||
CV_DEF_REG_TRAITS(v, v_uint8x16, uchar, u8, v_uint8x16, v_uint16x8, v_uint32x4, v_int8x16, void);
|
||||
CV_DEF_REG_TRAITS(v, v_int8x16, schar, s8, v_uint8x16, v_int16x8, v_int32x4, v_int8x16, void);
|
||||
CV_DEF_REG_TRAITS(v, v_uint16x8, ushort, u16, v_uint16x8, v_uint32x4, v_uint64x2, v_int16x8, void);
|
||||
CV_DEF_REG_TRAITS(v, v_int16x8, short, s16, v_uint16x8, v_int32x4, v_int64x2, v_int16x8, void);
|
||||
CV_DEF_REG_TRAITS(v, v_uint32x4, unsigned, u32, v_uint32x4, v_uint64x2, void, v_int32x4, void);
|
||||
CV_DEF_REG_TRAITS(v, v_int32x4, int, s32, v_uint32x4, v_int64x2, void, v_int32x4, void);
|
||||
#if CV_SIMD128_64F || CV_SIMD128_CPP
|
||||
CV_DEF_REG_TRAITS(v, v_float32x4, float, f32, v_float32x4, v_float64x2, void, v_int32x4, v_int32x4);
|
||||
#else
|
||||
CV_DEF_REG_TRAITS(v, v_float32x4, float, f32, v_float32x4, void, void, v_int32x4, v_int32x4);
|
||||
#endif
|
||||
CV_DEF_REG_TRAITS(v, v_uint64x2, uint64, u64, v_uint64x2, void, void, v_int64x2, void);
|
||||
CV_DEF_REG_TRAITS(v, v_int64x2, int64, s64, v_uint64x2, void, void, v_int64x2, void);
|
||||
#if CV_SIMD128_64F
|
||||
CV_DEF_REG_TRAITS(v, v_float64x2, double, f64, v_float64x2, void, void, v_int64x2, v_int32x4);
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#if CV_SIMD256
|
||||
CV_DEF_REG_TRAITS(v256, v_uint8x32, uchar, u8, v_uint8x32, v_uint16x16, v_uint32x8, v_int8x32, void);
|
||||
CV_DEF_REG_TRAITS(v256, v_int8x32, schar, s8, v_uint8x32, v_int16x16, v_int32x8, v_int8x32, void);
|
||||
CV_DEF_REG_TRAITS(v256, v_uint16x16, ushort, u16, v_uint16x16, v_uint32x8, v_uint64x4, v_int16x16, void);
|
||||
CV_DEF_REG_TRAITS(v256, v_int16x16, short, s16, v_uint16x16, v_int32x8, v_int64x4, v_int16x16, void);
|
||||
CV_DEF_REG_TRAITS(v256, v_uint32x8, unsigned, u32, v_uint32x8, v_uint64x4, void, v_int32x8, void);
|
||||
CV_DEF_REG_TRAITS(v256, v_int32x8, int, s32, v_uint32x8, v_int64x4, void, v_int32x8, void);
|
||||
CV_DEF_REG_TRAITS(v256, v_float32x8, float, f32, v_float32x8, v_float64x4, void, v_int32x8, v_int32x8);
|
||||
CV_DEF_REG_TRAITS(v256, v_uint64x4, uint64, u64, v_uint64x4, void, void, v_int64x4, void);
|
||||
CV_DEF_REG_TRAITS(v256, v_int64x4, int64, s64, v_uint64x4, void, void, v_int64x4, void);
|
||||
CV_DEF_REG_TRAITS(v256, v_float64x4, double, f64, v_float64x4, void, void, v_int64x4, v_int32x8);
|
||||
#endif
|
||||
|
||||
#if CV_SIMD512
|
||||
CV_DEF_REG_TRAITS(v512, v_uint8x64, uchar, u8, v_uint8x64, v_uint16x32, v_uint32x16, v_int8x64, void);
|
||||
CV_DEF_REG_TRAITS(v512, v_int8x64, schar, s8, v_uint8x64, v_int16x32, v_int32x16, v_int8x64, void);
|
||||
CV_DEF_REG_TRAITS(v512, v_uint16x32, ushort, u16, v_uint16x32, v_uint32x16, v_uint64x8, v_int16x32, void);
|
||||
CV_DEF_REG_TRAITS(v512, v_int16x32, short, s16, v_uint16x32, v_int32x16, v_int64x8, v_int16x32, void);
|
||||
CV_DEF_REG_TRAITS(v512, v_uint32x16, unsigned, u32, v_uint32x16, v_uint64x8, void, v_int32x16, void);
|
||||
CV_DEF_REG_TRAITS(v512, v_int32x16, int, s32, v_uint32x16, v_int64x8, void, v_int32x16, void);
|
||||
CV_DEF_REG_TRAITS(v512, v_float32x16, float, f32, v_float32x16, v_float64x8, void, v_int32x16, v_int32x16);
|
||||
CV_DEF_REG_TRAITS(v512, v_uint64x8, uint64, u64, v_uint64x8, void, void, v_int64x8, void);
|
||||
CV_DEF_REG_TRAITS(v512, v_int64x8, int64, s64, v_uint64x8, void, void, v_int64x8, void);
|
||||
CV_DEF_REG_TRAITS(v512, v_float64x8, double, f64, v_float64x8, void, void, v_int64x8, v_int32x16);
|
||||
#endif
|
||||
//! @endcond
|
||||
|
||||
#if CV_SIMD512 && (!defined(CV__SIMD_FORCE_WIDTH) || CV__SIMD_FORCE_WIDTH == 512)
|
||||
#define CV__SIMD_NAMESPACE simd512
|
||||
namespace CV__SIMD_NAMESPACE {
|
||||
#define CV_SIMD 1
|
||||
#define CV_SIMD_64F CV_SIMD512_64F
|
||||
#define CV_SIMD_FP16 CV_SIMD512_FP16
|
||||
#define CV_SIMD_WIDTH 64
|
||||
//! @addtogroup core_hal_intrin
|
||||
//! @{
|
||||
//! @brief Maximum available vector register capacity 8-bit unsigned integer values
|
||||
typedef v_uint8x64 v_uint8;
|
||||
//! @brief Maximum available vector register capacity 8-bit signed integer values
|
||||
typedef v_int8x64 v_int8;
|
||||
//! @brief Maximum available vector register capacity 16-bit unsigned integer values
|
||||
typedef v_uint16x32 v_uint16;
|
||||
//! @brief Maximum available vector register capacity 16-bit signed integer values
|
||||
typedef v_int16x32 v_int16;
|
||||
//! @brief Maximum available vector register capacity 32-bit unsigned integer values
|
||||
typedef v_uint32x16 v_uint32;
|
||||
//! @brief Maximum available vector register capacity 32-bit signed integer values
|
||||
typedef v_int32x16 v_int32;
|
||||
//! @brief Maximum available vector register capacity 64-bit unsigned integer values
|
||||
typedef v_uint64x8 v_uint64;
|
||||
//! @brief Maximum available vector register capacity 64-bit signed integer values
|
||||
typedef v_int64x8 v_int64;
|
||||
//! @brief Maximum available vector register capacity 32-bit floating point values (single precision)
|
||||
typedef v_float32x16 v_float32;
|
||||
#if CV_SIMD512_64F
|
||||
//! @brief Maximum available vector register capacity 64-bit floating point values (double precision)
|
||||
typedef v_float64x8 v_float64;
|
||||
#endif
|
||||
//! @}
|
||||
|
||||
#define VXPREFIX(func) v512##func
|
||||
} // namespace
|
||||
using namespace CV__SIMD_NAMESPACE;
|
||||
#elif CV_SIMD256 && (!defined(CV__SIMD_FORCE_WIDTH) || CV__SIMD_FORCE_WIDTH == 256)
|
||||
#define CV__SIMD_NAMESPACE simd256
|
||||
namespace CV__SIMD_NAMESPACE {
|
||||
#define CV_SIMD 1
|
||||
#define CV_SIMD_64F CV_SIMD256_64F
|
||||
#define CV_SIMD_FP16 CV_SIMD256_FP16
|
||||
#define CV_SIMD_WIDTH 32
|
||||
//! @addtogroup core_hal_intrin
|
||||
//! @{
|
||||
//! @brief Maximum available vector register capacity 8-bit unsigned integer values
|
||||
typedef v_uint8x32 v_uint8;
|
||||
//! @brief Maximum available vector register capacity 8-bit signed integer values
|
||||
typedef v_int8x32 v_int8;
|
||||
//! @brief Maximum available vector register capacity 16-bit unsigned integer values
|
||||
typedef v_uint16x16 v_uint16;
|
||||
//! @brief Maximum available vector register capacity 16-bit signed integer values
|
||||
typedef v_int16x16 v_int16;
|
||||
//! @brief Maximum available vector register capacity 32-bit unsigned integer values
|
||||
typedef v_uint32x8 v_uint32;
|
||||
//! @brief Maximum available vector register capacity 32-bit signed integer values
|
||||
typedef v_int32x8 v_int32;
|
||||
//! @brief Maximum available vector register capacity 64-bit unsigned integer values
|
||||
typedef v_uint64x4 v_uint64;
|
||||
//! @brief Maximum available vector register capacity 64-bit signed integer values
|
||||
typedef v_int64x4 v_int64;
|
||||
//! @brief Maximum available vector register capacity 32-bit floating point values (single precision)
|
||||
typedef v_float32x8 v_float32;
|
||||
#if CV_SIMD256_64F
|
||||
//! @brief Maximum available vector register capacity 64-bit floating point values (double precision)
|
||||
typedef v_float64x4 v_float64;
|
||||
#endif
|
||||
//! @}
|
||||
|
||||
#define VXPREFIX(func) v256##func
|
||||
} // namespace
|
||||
using namespace CV__SIMD_NAMESPACE;
|
||||
#elif (CV_SIMD128 || CV_SIMD128_CPP) && (!defined(CV__SIMD_FORCE_WIDTH) || CV__SIMD_FORCE_WIDTH == 128)
|
||||
#if defined CV_SIMD128_CPP
|
||||
#define CV__SIMD_NAMESPACE simd128_cpp
|
||||
#else
|
||||
#define CV__SIMD_NAMESPACE simd128
|
||||
#endif
|
||||
namespace CV__SIMD_NAMESPACE {
|
||||
#define CV_SIMD CV_SIMD128
|
||||
#define CV_SIMD_64F CV_SIMD128_64F
|
||||
#define CV_SIMD_WIDTH 16
|
||||
//! @addtogroup core_hal_intrin
|
||||
//! @{
|
||||
//! @brief Maximum available vector register capacity 8-bit unsigned integer values
|
||||
typedef v_uint8x16 v_uint8;
|
||||
//! @brief Maximum available vector register capacity 8-bit signed integer values
|
||||
typedef v_int8x16 v_int8;
|
||||
//! @brief Maximum available vector register capacity 16-bit unsigned integer values
|
||||
typedef v_uint16x8 v_uint16;
|
||||
//! @brief Maximum available vector register capacity 16-bit signed integer values
|
||||
typedef v_int16x8 v_int16;
|
||||
//! @brief Maximum available vector register capacity 32-bit unsigned integer values
|
||||
typedef v_uint32x4 v_uint32;
|
||||
//! @brief Maximum available vector register capacity 32-bit signed integer values
|
||||
typedef v_int32x4 v_int32;
|
||||
//! @brief Maximum available vector register capacity 64-bit unsigned integer values
|
||||
typedef v_uint64x2 v_uint64;
|
||||
//! @brief Maximum available vector register capacity 64-bit signed integer values
|
||||
typedef v_int64x2 v_int64;
|
||||
//! @brief Maximum available vector register capacity 32-bit floating point values (single precision)
|
||||
typedef v_float32x4 v_float32;
|
||||
#if CV_SIMD128_64F
|
||||
//! @brief Maximum available vector register capacity 64-bit floating point values (double precision)
|
||||
typedef v_float64x2 v_float64;
|
||||
#endif
|
||||
//! @}
|
||||
|
||||
#define VXPREFIX(func) v##func
|
||||
} // namespace
|
||||
using namespace CV__SIMD_NAMESPACE;
|
||||
#endif
|
||||
|
||||
namespace CV__SIMD_NAMESPACE {
|
||||
//! @addtogroup core_hal_intrin
|
||||
//! @{
|
||||
//! @name Wide init with value
|
||||
//! @{
|
||||
//! @brief Create maximum available capacity vector with elements set to a specific value
|
||||
inline v_uint8 vx_setall_u8(uchar v) { return VXPREFIX(_setall_u8)(v); }
|
||||
inline v_int8 vx_setall_s8(schar v) { return VXPREFIX(_setall_s8)(v); }
|
||||
inline v_uint16 vx_setall_u16(ushort v) { return VXPREFIX(_setall_u16)(v); }
|
||||
inline v_int16 vx_setall_s16(short v) { return VXPREFIX(_setall_s16)(v); }
|
||||
inline v_int32 vx_setall_s32(int v) { return VXPREFIX(_setall_s32)(v); }
|
||||
inline v_uint32 vx_setall_u32(unsigned v) { return VXPREFIX(_setall_u32)(v); }
|
||||
inline v_float32 vx_setall_f32(float v) { return VXPREFIX(_setall_f32)(v); }
|
||||
inline v_int64 vx_setall_s64(int64 v) { return VXPREFIX(_setall_s64)(v); }
|
||||
inline v_uint64 vx_setall_u64(uint64 v) { return VXPREFIX(_setall_u64)(v); }
|
||||
#if CV_SIMD_64F
|
||||
inline v_float64 vx_setall_f64(double v) { return VXPREFIX(_setall_f64)(v); }
|
||||
#endif
|
||||
//! @}
|
||||
|
||||
//! @name Wide init with zero
|
||||
//! @{
|
||||
//! @brief Create maximum available capacity vector with elements set to zero
|
||||
inline v_uint8 vx_setzero_u8() { return VXPREFIX(_setzero_u8)(); }
|
||||
inline v_int8 vx_setzero_s8() { return VXPREFIX(_setzero_s8)(); }
|
||||
inline v_uint16 vx_setzero_u16() { return VXPREFIX(_setzero_u16)(); }
|
||||
inline v_int16 vx_setzero_s16() { return VXPREFIX(_setzero_s16)(); }
|
||||
inline v_int32 vx_setzero_s32() { return VXPREFIX(_setzero_s32)(); }
|
||||
inline v_uint32 vx_setzero_u32() { return VXPREFIX(_setzero_u32)(); }
|
||||
inline v_float32 vx_setzero_f32() { return VXPREFIX(_setzero_f32)(); }
|
||||
inline v_int64 vx_setzero_s64() { return VXPREFIX(_setzero_s64)(); }
|
||||
inline v_uint64 vx_setzero_u64() { return VXPREFIX(_setzero_u64)(); }
|
||||
#if CV_SIMD_64F
|
||||
inline v_float64 vx_setzero_f64() { return VXPREFIX(_setzero_f64)(); }
|
||||
#endif
|
||||
//! @}
|
||||
|
||||
//! @name Wide load from memory
|
||||
//! @{
|
||||
//! @brief Load maximum available capacity register contents from memory
|
||||
inline v_uint8 vx_load(const uchar * ptr) { return VXPREFIX(_load)(ptr); }
|
||||
inline v_int8 vx_load(const schar * ptr) { return VXPREFIX(_load)(ptr); }
|
||||
inline v_uint16 vx_load(const ushort * ptr) { return VXPREFIX(_load)(ptr); }
|
||||
inline v_int16 vx_load(const short * ptr) { return VXPREFIX(_load)(ptr); }
|
||||
inline v_int32 vx_load(const int * ptr) { return VXPREFIX(_load)(ptr); }
|
||||
inline v_uint32 vx_load(const unsigned * ptr) { return VXPREFIX(_load)(ptr); }
|
||||
inline v_float32 vx_load(const float * ptr) { return VXPREFIX(_load)(ptr); }
|
||||
inline v_int64 vx_load(const int64 * ptr) { return VXPREFIX(_load)(ptr); }
|
||||
inline v_uint64 vx_load(const uint64 * ptr) { return VXPREFIX(_load)(ptr); }
|
||||
#if CV_SIMD_64F
|
||||
inline v_float64 vx_load(const double * ptr) { return VXPREFIX(_load)(ptr); }
|
||||
#endif
|
||||
//! @}
|
||||
|
||||
//! @name Wide load from memory(aligned)
|
||||
//! @{
|
||||
//! @brief Load maximum available capacity register contents from memory(aligned)
|
||||
inline v_uint8 vx_load_aligned(const uchar * ptr) { return VXPREFIX(_load_aligned)(ptr); }
|
||||
inline v_int8 vx_load_aligned(const schar * ptr) { return VXPREFIX(_load_aligned)(ptr); }
|
||||
inline v_uint16 vx_load_aligned(const ushort * ptr) { return VXPREFIX(_load_aligned)(ptr); }
|
||||
inline v_int16 vx_load_aligned(const short * ptr) { return VXPREFIX(_load_aligned)(ptr); }
|
||||
inline v_int32 vx_load_aligned(const int * ptr) { return VXPREFIX(_load_aligned)(ptr); }
|
||||
inline v_uint32 vx_load_aligned(const unsigned * ptr) { return VXPREFIX(_load_aligned)(ptr); }
|
||||
inline v_float32 vx_load_aligned(const float * ptr) { return VXPREFIX(_load_aligned)(ptr); }
|
||||
inline v_int64 vx_load_aligned(const int64 * ptr) { return VXPREFIX(_load_aligned)(ptr); }
|
||||
inline v_uint64 vx_load_aligned(const uint64 * ptr) { return VXPREFIX(_load_aligned)(ptr); }
|
||||
#if CV_SIMD_64F
|
||||
inline v_float64 vx_load_aligned(const double * ptr) { return VXPREFIX(_load_aligned)(ptr); }
|
||||
#endif
|
||||
//! @}
|
||||
|
||||
//! @name Wide load lower half from memory
|
||||
//! @{
|
||||
//! @brief Load lower half of maximum available capacity register from memory
|
||||
inline v_uint8 vx_load_low(const uchar * ptr) { return VXPREFIX(_load_low)(ptr); }
|
||||
inline v_int8 vx_load_low(const schar * ptr) { return VXPREFIX(_load_low)(ptr); }
|
||||
inline v_uint16 vx_load_low(const ushort * ptr) { return VXPREFIX(_load_low)(ptr); }
|
||||
inline v_int16 vx_load_low(const short * ptr) { return VXPREFIX(_load_low)(ptr); }
|
||||
inline v_int32 vx_load_low(const int * ptr) { return VXPREFIX(_load_low)(ptr); }
|
||||
inline v_uint32 vx_load_low(const unsigned * ptr) { return VXPREFIX(_load_low)(ptr); }
|
||||
inline v_float32 vx_load_low(const float * ptr) { return VXPREFIX(_load_low)(ptr); }
|
||||
inline v_int64 vx_load_low(const int64 * ptr) { return VXPREFIX(_load_low)(ptr); }
|
||||
inline v_uint64 vx_load_low(const uint64 * ptr) { return VXPREFIX(_load_low)(ptr); }
|
||||
#if CV_SIMD_64F
|
||||
inline v_float64 vx_load_low(const double * ptr) { return VXPREFIX(_load_low)(ptr); }
|
||||
#endif
|
||||
//! @}
|
||||
|
||||
//! @name Wide load halfs from memory
|
||||
//! @{
|
||||
//! @brief Load maximum available capacity register contents from two memory blocks
|
||||
inline v_uint8 vx_load_halves(const uchar * ptr0, const uchar * ptr1) { return VXPREFIX(_load_halves)(ptr0, ptr1); }
|
||||
inline v_int8 vx_load_halves(const schar * ptr0, const schar * ptr1) { return VXPREFIX(_load_halves)(ptr0, ptr1); }
|
||||
inline v_uint16 vx_load_halves(const ushort * ptr0, const ushort * ptr1) { return VXPREFIX(_load_halves)(ptr0, ptr1); }
|
||||
inline v_int16 vx_load_halves(const short * ptr0, const short * ptr1) { return VXPREFIX(_load_halves)(ptr0, ptr1); }
|
||||
inline v_int32 vx_load_halves(const int * ptr0, const int * ptr1) { return VXPREFIX(_load_halves)(ptr0, ptr1); }
|
||||
inline v_uint32 vx_load_halves(const unsigned * ptr0, const unsigned * ptr1) { return VXPREFIX(_load_halves)(ptr0, ptr1); }
|
||||
inline v_float32 vx_load_halves(const float * ptr0, const float * ptr1) { return VXPREFIX(_load_halves)(ptr0, ptr1); }
|
||||
inline v_int64 vx_load_halves(const int64 * ptr0, const int64 * ptr1) { return VXPREFIX(_load_halves)(ptr0, ptr1); }
|
||||
inline v_uint64 vx_load_halves(const uint64 * ptr0, const uint64 * ptr1) { return VXPREFIX(_load_halves)(ptr0, ptr1); }
|
||||
#if CV_SIMD_64F
|
||||
inline v_float64 vx_load_halves(const double * ptr0, const double * ptr1) { return VXPREFIX(_load_halves)(ptr0, ptr1); }
|
||||
#endif
|
||||
//! @}
|
||||
|
||||
//! @name Wide LUT of elements
|
||||
//! @{
|
||||
//! @brief Load maximum available capacity register contents with array elements by provided indexes
|
||||
inline v_uint8 vx_lut(const uchar * ptr, const int* idx) { return VXPREFIX(_lut)(ptr, idx); }
|
||||
inline v_int8 vx_lut(const schar * ptr, const int* idx) { return VXPREFIX(_lut)(ptr, idx); }
|
||||
inline v_uint16 vx_lut(const ushort * ptr, const int* idx) { return VXPREFIX(_lut)(ptr, idx); }
|
||||
inline v_int16 vx_lut(const short* ptr, const int* idx) { return VXPREFIX(_lut)(ptr, idx); }
|
||||
inline v_int32 vx_lut(const int* ptr, const int* idx) { return VXPREFIX(_lut)(ptr, idx); }
|
||||
inline v_uint32 vx_lut(const unsigned* ptr, const int* idx) { return VXPREFIX(_lut)(ptr, idx); }
|
||||
inline v_float32 vx_lut(const float* ptr, const int* idx) { return VXPREFIX(_lut)(ptr, idx); }
|
||||
inline v_int64 vx_lut(const int64 * ptr, const int* idx) { return VXPREFIX(_lut)(ptr, idx); }
|
||||
inline v_uint64 vx_lut(const uint64 * ptr, const int* idx) { return VXPREFIX(_lut)(ptr, idx); }
|
||||
#if CV_SIMD_64F
|
||||
inline v_float64 vx_lut(const double* ptr, const int* idx) { return VXPREFIX(_lut)(ptr, idx); }
|
||||
#endif
|
||||
//! @}
|
||||
|
||||
//! @name Wide LUT of element pairs
|
||||
//! @{
|
||||
//! @brief Load maximum available capacity register contents with array element pairs by provided indexes
|
||||
inline v_uint8 vx_lut_pairs(const uchar * ptr, const int* idx) { return VXPREFIX(_lut_pairs)(ptr, idx); }
|
||||
inline v_int8 vx_lut_pairs(const schar * ptr, const int* idx) { return VXPREFIX(_lut_pairs)(ptr, idx); }
|
||||
inline v_uint16 vx_lut_pairs(const ushort * ptr, const int* idx) { return VXPREFIX(_lut_pairs)(ptr, idx); }
|
||||
inline v_int16 vx_lut_pairs(const short* ptr, const int* idx) { return VXPREFIX(_lut_pairs)(ptr, idx); }
|
||||
inline v_int32 vx_lut_pairs(const int* ptr, const int* idx) { return VXPREFIX(_lut_pairs)(ptr, idx); }
|
||||
inline v_uint32 vx_lut_pairs(const unsigned* ptr, const int* idx) { return VXPREFIX(_lut_pairs)(ptr, idx); }
|
||||
inline v_float32 vx_lut_pairs(const float* ptr, const int* idx) { return VXPREFIX(_lut_pairs)(ptr, idx); }
|
||||
inline v_int64 vx_lut_pairs(const int64 * ptr, const int* idx) { return VXPREFIX(_lut_pairs)(ptr, idx); }
|
||||
inline v_uint64 vx_lut_pairs(const uint64 * ptr, const int* idx) { return VXPREFIX(_lut_pairs)(ptr, idx); }
|
||||
#if CV_SIMD_64F
|
||||
inline v_float64 vx_lut_pairs(const double* ptr, const int* idx) { return VXPREFIX(_lut_pairs)(ptr, idx); }
|
||||
#endif
|
||||
//! @}
|
||||
|
||||
//! @name Wide LUT of element quads
|
||||
//! @{
|
||||
//! @brief Load maximum available capacity register contents with array element quads by provided indexes
|
||||
inline v_uint8 vx_lut_quads(const uchar* ptr, const int* idx) { return VXPREFIX(_lut_quads)(ptr, idx); }
|
||||
inline v_int8 vx_lut_quads(const schar* ptr, const int* idx) { return VXPREFIX(_lut_quads)(ptr, idx); }
|
||||
inline v_uint16 vx_lut_quads(const ushort* ptr, const int* idx) { return VXPREFIX(_lut_quads)(ptr, idx); }
|
||||
inline v_int16 vx_lut_quads(const short* ptr, const int* idx) { return VXPREFIX(_lut_quads)(ptr, idx); }
|
||||
inline v_int32 vx_lut_quads(const int* ptr, const int* idx) { return VXPREFIX(_lut_quads)(ptr, idx); }
|
||||
inline v_uint32 vx_lut_quads(const unsigned* ptr, const int* idx) { return VXPREFIX(_lut_quads)(ptr, idx); }
|
||||
inline v_float32 vx_lut_quads(const float* ptr, const int* idx) { return VXPREFIX(_lut_quads)(ptr, idx); }
|
||||
//! @}
|
||||
|
||||
//! @name Wide load with double expansion
|
||||
//! @{
|
||||
//! @brief Load maximum available capacity register contents from memory with double expand
|
||||
inline v_uint16 vx_load_expand(const uchar * ptr) { return VXPREFIX(_load_expand)(ptr); }
|
||||
inline v_int16 vx_load_expand(const schar * ptr) { return VXPREFIX(_load_expand)(ptr); }
|
||||
inline v_uint32 vx_load_expand(const ushort * ptr) { return VXPREFIX(_load_expand)(ptr); }
|
||||
inline v_int32 vx_load_expand(const short* ptr) { return VXPREFIX(_load_expand)(ptr); }
|
||||
inline v_int64 vx_load_expand(const int* ptr) { return VXPREFIX(_load_expand)(ptr); }
|
||||
inline v_uint64 vx_load_expand(const unsigned* ptr) { return VXPREFIX(_load_expand)(ptr); }
|
||||
inline v_float32 vx_load_expand(const float16_t * ptr) { return VXPREFIX(_load_expand)(ptr); }
|
||||
//! @}
|
||||
|
||||
//! @name Wide load with quad expansion
|
||||
//! @{
|
||||
//! @brief Load maximum available capacity register contents from memory with quad expand
|
||||
inline v_uint32 vx_load_expand_q(const uchar * ptr) { return VXPREFIX(_load_expand_q)(ptr); }
|
||||
inline v_int32 vx_load_expand_q(const schar * ptr) { return VXPREFIX(_load_expand_q)(ptr); }
|
||||
//! @}
|
||||
|
||||
/** @brief SIMD processing state cleanup call */
|
||||
inline void vx_cleanup() { VXPREFIX(_cleanup)(); }
|
||||
|
||||
|
||||
//! @cond IGNORED
|
||||
|
||||
// backward compatibility
|
||||
template<typename _Tp, typename _Tvec> static inline
|
||||
void vx_store(_Tp* dst, const _Tvec& v) { return v_store(dst, v); }
|
||||
// backward compatibility
|
||||
template<typename _Tp, typename _Tvec> static inline
|
||||
void vx_store_aligned(_Tp* dst, const _Tvec& v) { return v_store_aligned(dst, v); }
|
||||
|
||||
//! @endcond
|
||||
|
||||
|
||||
//! @}
|
||||
#undef VXPREFIX
|
||||
} // namespace
|
||||
|
||||
//! @cond IGNORED
|
||||
#ifndef CV_SIMD_64F
|
||||
#define CV_SIMD_64F 0
|
||||
#endif
|
||||
|
||||
#ifndef CV_SIMD_FP16
|
||||
#define CV_SIMD_FP16 0 //!< Defined to 1 on native support of operations with float16x8_t / float16x16_t (SIMD256) types
|
||||
#endif
|
||||
|
||||
#ifndef CV_SIMD
|
||||
#define CV_SIMD 0
|
||||
#endif
|
||||
|
||||
#include "simd_utils.impl.hpp"
|
||||
|
||||
#ifndef CV_DOXYGEN
|
||||
CV_CPU_OPTIMIZATION_HAL_NAMESPACE_END
|
||||
#endif
|
||||
|
||||
} // cv::
|
||||
|
||||
//! @endcond
|
||||
|
||||
#endif
|
|
@ -90,6 +90,50 @@ inline __m256i _v256_packs_epu32(const __m256i& a, const __m256i& b)
|
|||
return _mm256_packus_epi32(am, bm);
|
||||
}
|
||||
|
||||
template<int i>
|
||||
inline int _v256_extract_epi8(const __m256i& a)
|
||||
{
|
||||
#if defined(CV__SIMD_HAVE_mm256_extract_epi8) || (CV_AVX2 && (!defined(_MSC_VER) || _MSC_VER >= 1910/*MSVS 2017*/))
|
||||
return _mm256_extract_epi8(a, i);
|
||||
#else
|
||||
__m128i b = _mm256_extractf128_si256(a, ((i) >> 4));
|
||||
return _mm_extract_epi8(b, i & 15); // SSE4.1
|
||||
#endif
|
||||
}
|
||||
|
||||
template<int i>
|
||||
inline int _v256_extract_epi16(const __m256i& a)
|
||||
{
|
||||
#if defined(CV__SIMD_HAVE_mm256_extract_epi8) || (CV_AVX2 && (!defined(_MSC_VER) || _MSC_VER >= 1910/*MSVS 2017*/))
|
||||
return _mm256_extract_epi16(a, i);
|
||||
#else
|
||||
__m128i b = _mm256_extractf128_si256(a, ((i) >> 3));
|
||||
return _mm_extract_epi16(b, i & 7); // SSE2
|
||||
#endif
|
||||
}
|
||||
|
||||
template<int i>
|
||||
inline int _v256_extract_epi32(const __m256i& a)
|
||||
{
|
||||
#if defined(CV__SIMD_HAVE_mm256_extract_epi8) || (CV_AVX2 && (!defined(_MSC_VER) || _MSC_VER >= 1910/*MSVS 2017*/))
|
||||
return _mm256_extract_epi32(a, i);
|
||||
#else
|
||||
__m128i b = _mm256_extractf128_si256(a, ((i) >> 2));
|
||||
return _mm_extract_epi32(b, i & 3); // SSE4.1
|
||||
#endif
|
||||
}
|
||||
|
||||
template<int i>
|
||||
inline int64 _v256_extract_epi64(const __m256i& a)
|
||||
{
|
||||
#if defined(CV__SIMD_HAVE_mm256_extract_epi8) || (CV_AVX2 && (!defined(_MSC_VER) || _MSC_VER >= 1910/*MSVS 2017*/))
|
||||
return _mm256_extract_epi64(a, i);
|
||||
#else
|
||||
__m128i b = _mm256_extractf128_si256(a, ((i) >> 1));
|
||||
return _mm_extract_epi64(b, i & 1); // SSE4.1
|
||||
#endif
|
||||
}
|
||||
|
||||
///////// Types ////////////
|
||||
|
||||
struct v_uint8x32
|
||||
|
@ -115,7 +159,9 @@ struct v_uint8x32
|
|||
(char)v22, (char)v23, (char)v24, (char)v25, (char)v26, (char)v27,
|
||||
(char)v28, (char)v29, (char)v30, (char)v31);
|
||||
}
|
||||
v_uint8x32() : val(_mm256_setzero_si256()) {}
|
||||
/* coverity[uninit_ctor]: suppress warning */
|
||||
v_uint8x32() {}
|
||||
|
||||
uchar get0() const { return (uchar)_v_cvtsi256_si32(val); }
|
||||
};
|
||||
|
||||
|
@ -139,7 +185,9 @@ struct v_int8x32
|
|||
v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20,
|
||||
v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31);
|
||||
}
|
||||
v_int8x32() : val(_mm256_setzero_si256()) {}
|
||||
/* coverity[uninit_ctor]: suppress warning */
|
||||
v_int8x32() {}
|
||||
|
||||
schar get0() const { return (schar)_v_cvtsi256_si32(val); }
|
||||
};
|
||||
|
||||
|
@ -159,7 +207,9 @@ struct v_uint16x16
|
|||
(short)v4, (short)v5, (short)v6, (short)v7, (short)v8, (short)v9,
|
||||
(short)v10, (short)v11, (short)v12, (short)v13, (short)v14, (short)v15);
|
||||
}
|
||||
v_uint16x16() : val(_mm256_setzero_si256()) {}
|
||||
/* coverity[uninit_ctor]: suppress warning */
|
||||
v_uint16x16() {}
|
||||
|
||||
ushort get0() const { return (ushort)_v_cvtsi256_si32(val); }
|
||||
};
|
||||
|
||||
|
@ -178,7 +228,9 @@ struct v_int16x16
|
|||
val = _mm256_setr_epi16(v0, v1, v2, v3, v4, v5, v6, v7,
|
||||
v8, v9, v10, v11, v12, v13, v14, v15);
|
||||
}
|
||||
v_int16x16() : val(_mm256_setzero_si256()) {}
|
||||
/* coverity[uninit_ctor]: suppress warning */
|
||||
v_int16x16() {}
|
||||
|
||||
short get0() const { return (short)_v_cvtsi256_si32(val); }
|
||||
};
|
||||
|
||||
|
@ -195,7 +247,9 @@ struct v_uint32x8
|
|||
val = _mm256_setr_epi32((unsigned)v0, (unsigned)v1, (unsigned)v2,
|
||||
(unsigned)v3, (unsigned)v4, (unsigned)v5, (unsigned)v6, (unsigned)v7);
|
||||
}
|
||||
v_uint32x8() : val(_mm256_setzero_si256()) {}
|
||||
/* coverity[uninit_ctor]: suppress warning */
|
||||
v_uint32x8() {}
|
||||
|
||||
unsigned get0() const { return (unsigned)_v_cvtsi256_si32(val); }
|
||||
};
|
||||
|
||||
|
@ -211,7 +265,9 @@ struct v_int32x8
|
|||
{
|
||||
val = _mm256_setr_epi32(v0, v1, v2, v3, v4, v5, v6, v7);
|
||||
}
|
||||
v_int32x8() : val(_mm256_setzero_si256()) {}
|
||||
/* coverity[uninit_ctor]: suppress warning */
|
||||
v_int32x8() {}
|
||||
|
||||
int get0() const { return _v_cvtsi256_si32(val); }
|
||||
};
|
||||
|
||||
|
@ -227,7 +283,9 @@ struct v_float32x8
|
|||
{
|
||||
val = _mm256_setr_ps(v0, v1, v2, v3, v4, v5, v6, v7);
|
||||
}
|
||||
v_float32x8() : val(_mm256_setzero_ps()) {}
|
||||
/* coverity[uninit_ctor]: suppress warning */
|
||||
v_float32x8() {}
|
||||
|
||||
float get0() const { return _mm_cvtss_f32(_mm256_castps256_ps128(val)); }
|
||||
};
|
||||
|
||||
|
@ -240,7 +298,9 @@ struct v_uint64x4
|
|||
explicit v_uint64x4(__m256i v) : val(v) {}
|
||||
v_uint64x4(uint64 v0, uint64 v1, uint64 v2, uint64 v3)
|
||||
{ val = _mm256_setr_epi64x((int64)v0, (int64)v1, (int64)v2, (int64)v3); }
|
||||
v_uint64x4() : val(_mm256_setzero_si256()) {}
|
||||
/* coverity[uninit_ctor]: suppress warning */
|
||||
v_uint64x4() {}
|
||||
|
||||
uint64 get0() const
|
||||
{
|
||||
#if defined __x86_64__ || defined _M_X64
|
||||
|
@ -262,7 +322,8 @@ struct v_int64x4
|
|||
explicit v_int64x4(__m256i v) : val(v) {}
|
||||
v_int64x4(int64 v0, int64 v1, int64 v2, int64 v3)
|
||||
{ val = _mm256_setr_epi64x(v0, v1, v2, v3); }
|
||||
v_int64x4() : val(_mm256_setzero_si256()) {}
|
||||
/* coverity[uninit_ctor]: suppress warning */
|
||||
v_int64x4() {}
|
||||
|
||||
int64 get0() const
|
||||
{
|
||||
|
@ -285,7 +346,9 @@ struct v_float64x4
|
|||
explicit v_float64x4(__m256d v) : val(v) {}
|
||||
v_float64x4(double v0, double v1, double v2, double v3)
|
||||
{ val = _mm256_setr_pd(v0, v1, v2, v3); }
|
||||
v_float64x4() : val(_mm256_setzero_pd()) {}
|
||||
/* coverity[uninit_ctor]: suppress warning */
|
||||
v_float64x4() {}
|
||||
|
||||
double get0() const { return _mm_cvtsd_f64(_mm256_castpd256_pd128(val)); }
|
||||
};
|
||||
|
||||
|
@ -431,19 +494,6 @@ inline v_float64x4 v_reinterpret_as_f64(const v_float64x4& a)
|
|||
inline v_float64x4 v_reinterpret_as_f64(const v_float32x8& a)
|
||||
{ return v_float64x4(_mm256_castps_pd(a.val)); }
|
||||
|
||||
#if CV_FP16
|
||||
inline v_float32x8 v256_load_fp16_f32(const short* ptr)
|
||||
{
|
||||
return v_float32x8(_mm256_cvtph_ps(_mm_loadu_si128((const __m128i*)ptr)));
|
||||
}
|
||||
|
||||
inline void v_store_fp16(short* ptr, const v_float32x8& a)
|
||||
{
|
||||
__m128i fp16_value = _mm256_cvtps_ph(a.val, 0);
|
||||
_mm_store_si128((__m128i*)ptr, fp16_value);
|
||||
}
|
||||
#endif
|
||||
|
||||
/* Recombine */
|
||||
/*#define OPENCV_HAL_IMPL_AVX_COMBINE(_Tpvec, perm) \
|
||||
inline _Tpvec v_combine_low(const _Tpvec& a, const _Tpvec& b) \
|
||||
|
@ -538,7 +588,7 @@ inline v_int64x4 v256_blend(const v_int64x4& a, const v_int64x4& b)
|
|||
{ return v_int64x4(v256_blend<m>(v_uint64x4(a.val), v_uint64x4(b.val)).val); }
|
||||
|
||||
// shuffle
|
||||
// todo: emluate 64bit
|
||||
// todo: emulate 64bit
|
||||
#define OPENCV_HAL_IMPL_AVX_SHUFFLE(_Tpvec, intrin) \
|
||||
template<int m> \
|
||||
inline _Tpvec v256_shuffle(const _Tpvec& a) \
|
||||
|
@ -1025,9 +1075,85 @@ OPENCV_HAL_IMPL_AVX_ROTATE_CAST(v_rotate_right, v_float32x8, _mm256_castsi256_ps
|
|||
OPENCV_HAL_IMPL_AVX_ROTATE_CAST(v_rotate_left, v_float64x4, _mm256_castsi256_pd)
|
||||
OPENCV_HAL_IMPL_AVX_ROTATE_CAST(v_rotate_right, v_float64x4, _mm256_castsi256_pd)
|
||||
|
||||
/** Reverse **/
|
||||
inline v_uint8x32 v_reverse(const v_uint8x32 &a)
|
||||
{
|
||||
static const __m256i perm = _mm256_setr_epi8(
|
||||
15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
|
||||
15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
|
||||
__m256i vec = _mm256_shuffle_epi8(a.val, perm);
|
||||
return v_uint8x32(_mm256_permute2x128_si256(vec, vec, 1));
|
||||
}
|
||||
|
||||
inline v_int8x32 v_reverse(const v_int8x32 &a)
|
||||
{ return v_reinterpret_as_s8(v_reverse(v_reinterpret_as_u8(a))); }
|
||||
|
||||
inline v_uint16x16 v_reverse(const v_uint16x16 &a)
|
||||
{
|
||||
static const __m256i perm = _mm256_setr_epi8(
|
||||
14, 15, 12, 13, 10, 11, 8, 9, 6, 7, 4, 5, 2, 3, 0, 1,
|
||||
14, 15, 12, 13, 10, 11, 8, 9, 6, 7, 4, 5, 2, 3, 0, 1);
|
||||
__m256i vec = _mm256_shuffle_epi8(a.val, perm);
|
||||
return v_uint16x16(_mm256_permute2x128_si256(vec, vec, 1));
|
||||
}
|
||||
|
||||
inline v_int16x16 v_reverse(const v_int16x16 &a)
|
||||
{ return v_reinterpret_as_s16(v_reverse(v_reinterpret_as_u16(a))); }
|
||||
|
||||
inline v_uint32x8 v_reverse(const v_uint32x8 &a)
|
||||
{
|
||||
static const __m256i perm = _mm256_setr_epi32(7, 6, 5, 4, 3, 2, 1, 0);
|
||||
return v_uint32x8(_mm256_permutevar8x32_epi32(a.val, perm));
|
||||
}
|
||||
|
||||
inline v_int32x8 v_reverse(const v_int32x8 &a)
|
||||
{ return v_reinterpret_as_s32(v_reverse(v_reinterpret_as_u32(a))); }
|
||||
|
||||
inline v_float32x8 v_reverse(const v_float32x8 &a)
|
||||
{ return v_reinterpret_as_f32(v_reverse(v_reinterpret_as_u32(a))); }
|
||||
|
||||
inline v_uint64x4 v_reverse(const v_uint64x4 &a)
|
||||
{
|
||||
return v_uint64x4(_mm256_permute4x64_epi64(a.val, _MM_SHUFFLE(0, 1, 2, 3)));
|
||||
}
|
||||
|
||||
inline v_int64x4 v_reverse(const v_int64x4 &a)
|
||||
{ return v_reinterpret_as_s64(v_reverse(v_reinterpret_as_u64(a))); }
|
||||
|
||||
inline v_float64x4 v_reverse(const v_float64x4 &a)
|
||||
{ return v_reinterpret_as_f64(v_reverse(v_reinterpret_as_u64(a))); }
|
||||
|
||||
////////// Reduce and mask /////////
|
||||
|
||||
/** Reduce **/
|
||||
inline unsigned v_reduce_sum(const v_uint8x32& a)
|
||||
{
|
||||
__m256i half = _mm256_sad_epu8(a.val, _mm256_setzero_si256());
|
||||
__m128i quarter = _mm_add_epi32(_v256_extract_low(half), _v256_extract_high(half));
|
||||
return (unsigned)_mm_cvtsi128_si32(_mm_add_epi32(quarter, _mm_unpackhi_epi64(quarter, quarter)));
|
||||
}
|
||||
inline int v_reduce_sum(const v_int8x32& a)
|
||||
{
|
||||
__m256i half = _mm256_sad_epu8(_mm256_xor_si256(a.val, _mm256_set1_epi8((schar)-128)), _mm256_setzero_si256());
|
||||
__m128i quarter = _mm_add_epi32(_v256_extract_low(half), _v256_extract_high(half));
|
||||
return (unsigned)_mm_cvtsi128_si32(_mm_add_epi32(quarter, _mm_unpackhi_epi64(quarter, quarter))) - 4096;
|
||||
}
|
||||
#define OPENCV_HAL_IMPL_AVX_REDUCE_32(_Tpvec, sctype, func, intrin) \
|
||||
inline sctype v_reduce_##func(const _Tpvec& a) \
|
||||
{ \
|
||||
__m128i val = intrin(_v256_extract_low(a.val), _v256_extract_high(a.val)); \
|
||||
val = intrin(val, _mm_srli_si128(val,8)); \
|
||||
val = intrin(val, _mm_srli_si128(val,4)); \
|
||||
val = intrin(val, _mm_srli_si128(val,2)); \
|
||||
val = intrin(val, _mm_srli_si128(val,1)); \
|
||||
return (sctype)_mm_cvtsi128_si32(val); \
|
||||
}
|
||||
|
||||
OPENCV_HAL_IMPL_AVX_REDUCE_32(v_uint8x32, uchar, min, _mm_min_epu8)
|
||||
OPENCV_HAL_IMPL_AVX_REDUCE_32(v_int8x32, schar, min, _mm_min_epi8)
|
||||
OPENCV_HAL_IMPL_AVX_REDUCE_32(v_uint8x32, uchar, max, _mm_max_epu8)
|
||||
OPENCV_HAL_IMPL_AVX_REDUCE_32(v_int8x32, schar, max, _mm_max_epi8)
|
||||
|
||||
#define OPENCV_HAL_IMPL_AVX_REDUCE_16(_Tpvec, sctype, func, intrin) \
|
||||
inline sctype v_reduce_##func(const _Tpvec& a) \
|
||||
{ \
|
||||
|
@ -1068,38 +1194,13 @@ OPENCV_HAL_IMPL_AVX_REDUCE_8(v_int32x8, int, max, _mm_max_epi32)
|
|||
__m128 v1 = _v256_extract_high(a.val); \
|
||||
v0 = intrin(v0, v1); \
|
||||
v0 = intrin(v0, _mm_permute_ps(v0, _MM_SHUFFLE(0, 0, 3, 2))); \
|
||||
v0 = intrin(v0, _mm_permute_ps(v0, _MM_SHUFFLE(0, 0, 0, 3))); \
|
||||
v0 = intrin(v0, _mm_permute_ps(v0, _MM_SHUFFLE(0, 0, 0, 1))); \
|
||||
return _mm_cvtss_f32(v0); \
|
||||
}
|
||||
|
||||
OPENCV_HAL_IMPL_AVX_REDUCE_FLT(min, _mm_min_ps)
|
||||
OPENCV_HAL_IMPL_AVX_REDUCE_FLT(max, _mm_max_ps)
|
||||
|
||||
inline ushort v_reduce_sum(const v_uint16x16& a)
|
||||
{
|
||||
__m128i a0 = _v256_extract_low(a.val);
|
||||
__m128i a1 = _v256_extract_high(a.val);
|
||||
|
||||
__m128i s0 = _mm_adds_epu16(a0, a1);
|
||||
s0 = _mm_adds_epu16(s0, _mm_srli_si128(s0, 8));
|
||||
s0 = _mm_adds_epu16(s0, _mm_srli_si128(s0, 4));
|
||||
s0 = _mm_adds_epu16(s0, _mm_srli_si128(s0, 2));
|
||||
|
||||
return (ushort)_mm_cvtsi128_si32(s0);
|
||||
}
|
||||
|
||||
inline short v_reduce_sum(const v_int16x16& a)
|
||||
{
|
||||
__m256i s0 = _mm256_hadds_epi16(a.val, a.val);
|
||||
s0 = _mm256_hadds_epi16(s0, s0);
|
||||
s0 = _mm256_hadds_epi16(s0, s0);
|
||||
|
||||
__m128i s1 = _v256_extract_high(s0);
|
||||
s1 = _mm_adds_epi16(_v256_extract_low(s0), s1);
|
||||
|
||||
return (short)_mm_cvtsi128_si32(s1);
|
||||
}
|
||||
|
||||
inline int v_reduce_sum(const v_int32x8& a)
|
||||
{
|
||||
__m256i s0 = _mm256_hadd_epi32(a.val, a.val);
|
||||
|
@ -1114,6 +1215,11 @@ inline int v_reduce_sum(const v_int32x8& a)
|
|||
inline unsigned v_reduce_sum(const v_uint32x8& a)
|
||||
{ return v_reduce_sum(v_reinterpret_as_s32(a)); }
|
||||
|
||||
inline int v_reduce_sum(const v_int16x16& a)
|
||||
{ return v_reduce_sum(v_expand_low(a) + v_expand_high(a)); }
|
||||
inline unsigned v_reduce_sum(const v_uint16x16& a)
|
||||
{ return v_reduce_sum(v_expand_low(a) + v_expand_high(a)); }
|
||||
|
||||
inline float v_reduce_sum(const v_float32x8& a)
|
||||
{
|
||||
__m256 s0 = _mm256_hadd_ps(a.val, a.val);
|
||||
|
@ -1125,6 +1231,18 @@ inline float v_reduce_sum(const v_float32x8& a)
|
|||
return _mm_cvtss_f32(s1);
|
||||
}
|
||||
|
||||
inline uint64 v_reduce_sum(const v_uint64x4& a)
|
||||
{
|
||||
uint64 CV_DECL_ALIGNED(32) idx[2];
|
||||
_mm_store_si128((__m128i*)idx, _mm_add_epi64(_v256_extract_low(a.val), _v256_extract_high(a.val)));
|
||||
return idx[0] + idx[1];
|
||||
}
|
||||
inline int64 v_reduce_sum(const v_int64x4& a)
|
||||
{
|
||||
int64 CV_DECL_ALIGNED(32) idx[2];
|
||||
_mm_store_si128((__m128i*)idx, _mm_add_epi64(_v256_extract_low(a.val), _v256_extract_high(a.val)));
|
||||
return idx[0] + idx[1];
|
||||
}
|
||||
inline double v_reduce_sum(const v_float64x4& a)
|
||||
{
|
||||
__m256d s0 = _mm256_hadd_pd(a.val, a.val);
|
||||
|
@ -1141,12 +1259,16 @@ inline v_float32x8 v_reduce_sum4(const v_float32x8& a, const v_float32x8& b,
|
|||
|
||||
inline unsigned v_reduce_sad(const v_uint8x32& a, const v_uint8x32& b)
|
||||
{
|
||||
return (unsigned)_v_cvtsi256_si32(_mm256_sad_epu8(a.val, b.val));
|
||||
__m256i half = _mm256_sad_epu8(a.val, b.val);
|
||||
__m128i quarter = _mm_add_epi32(_v256_extract_low(half), _v256_extract_high(half));
|
||||
return (unsigned)_mm_cvtsi128_si32(_mm_add_epi32(quarter, _mm_unpackhi_epi64(quarter, quarter)));
|
||||
}
|
||||
inline unsigned v_reduce_sad(const v_int8x32& a, const v_int8x32& b)
|
||||
{
|
||||
__m256i half = _mm256_set1_epi8(0x7f);
|
||||
return (unsigned)_v_cvtsi256_si32(_mm256_sad_epu8(_mm256_add_epi8(a.val, half), _mm256_add_epi8(b.val, half)));
|
||||
half = _mm256_sad_epu8(_mm256_add_epi8(a.val, half), _mm256_add_epi8(b.val, half));
|
||||
__m128i quarter = _mm_add_epi32(_v256_extract_low(half), _v256_extract_high(half));
|
||||
return (unsigned)_mm_cvtsi128_si32(_mm_add_epi32(quarter, _mm_unpackhi_epi64(quarter, quarter)));
|
||||
}
|
||||
inline unsigned v_reduce_sad(const v_uint16x16& a, const v_uint16x16& b)
|
||||
{
|
||||
|
@ -1175,26 +1297,39 @@ inline float v_reduce_sad(const v_float32x8& a, const v_float32x8& b)
|
|||
}
|
||||
|
||||
/** Popcount **/
|
||||
#define OPENCV_HAL_IMPL_AVX_POPCOUNT(_Tpvec) \
|
||||
inline v_uint32x8 v_popcount(const _Tpvec& a) \
|
||||
{ \
|
||||
const v_uint32x8 m1 = v256_setall_u32(0x55555555); \
|
||||
const v_uint32x8 m2 = v256_setall_u32(0x33333333); \
|
||||
const v_uint32x8 m4 = v256_setall_u32(0x0f0f0f0f); \
|
||||
v_uint32x8 p = v_reinterpret_as_u32(a); \
|
||||
p = ((p >> 1) & m1) + (p & m1); \
|
||||
p = ((p >> 2) & m2) + (p & m2); \
|
||||
p = ((p >> 4) & m4) + (p & m4); \
|
||||
p.val = _mm256_sad_epu8(p.val, _mm256_setzero_si256()); \
|
||||
return p; \
|
||||
}
|
||||
|
||||
OPENCV_HAL_IMPL_AVX_POPCOUNT(v_uint8x32)
|
||||
OPENCV_HAL_IMPL_AVX_POPCOUNT(v_int8x32)
|
||||
OPENCV_HAL_IMPL_AVX_POPCOUNT(v_uint16x16)
|
||||
OPENCV_HAL_IMPL_AVX_POPCOUNT(v_int16x16)
|
||||
OPENCV_HAL_IMPL_AVX_POPCOUNT(v_uint32x8)
|
||||
OPENCV_HAL_IMPL_AVX_POPCOUNT(v_int32x8)
|
||||
inline v_uint8x32 v_popcount(const v_uint8x32& a)
|
||||
{
|
||||
__m256i _popcnt_table = _mm256_setr_epi8(0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4,
|
||||
0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4);
|
||||
__m256i _popcnt_mask = _mm256_set1_epi8(0x0F);
|
||||
return v_uint8x32(_mm256_add_epi8(_mm256_shuffle_epi8(_popcnt_table, _mm256_and_si256( a.val , _popcnt_mask)),
|
||||
_mm256_shuffle_epi8(_popcnt_table, _mm256_and_si256(_mm256_srli_epi16(a.val, 4), _popcnt_mask))));
|
||||
}
|
||||
inline v_uint16x16 v_popcount(const v_uint16x16& a)
|
||||
{
|
||||
v_uint8x32 p = v_popcount(v_reinterpret_as_u8(a));
|
||||
p += v_rotate_right<1>(p);
|
||||
return v_reinterpret_as_u16(p) & v256_setall_u16(0x00ff);
|
||||
}
|
||||
inline v_uint32x8 v_popcount(const v_uint32x8& a)
|
||||
{
|
||||
v_uint8x32 p = v_popcount(v_reinterpret_as_u8(a));
|
||||
p += v_rotate_right<1>(p);
|
||||
p += v_rotate_right<2>(p);
|
||||
return v_reinterpret_as_u32(p) & v256_setall_u32(0x000000ff);
|
||||
}
|
||||
inline v_uint64x4 v_popcount(const v_uint64x4& a)
|
||||
{
|
||||
return v_uint64x4(_mm256_sad_epu8(v_popcount(v_reinterpret_as_u8(a)).val, _mm256_setzero_si256()));
|
||||
}
|
||||
inline v_uint8x32 v_popcount(const v_int8x32& a)
|
||||
{ return v_popcount(v_reinterpret_as_u8(a)); }
|
||||
inline v_uint16x16 v_popcount(const v_int16x16& a)
|
||||
{ return v_popcount(v_reinterpret_as_u16(a)); }
|
||||
inline v_uint32x8 v_popcount(const v_int32x8& a)
|
||||
{ return v_popcount(v_reinterpret_as_u32(a)); }
|
||||
inline v_uint64x4 v_popcount(const v_int64x4& a)
|
||||
{ return v_popcount(v_reinterpret_as_u64(a)); }
|
||||
|
||||
/** Mask **/
|
||||
inline int v_signmask(const v_int8x32& a)
|
||||
|
@ -1203,62 +1338,54 @@ inline int v_signmask(const v_uint8x32& a)
|
|||
{ return v_signmask(v_reinterpret_as_s8(a)); }
|
||||
|
||||
inline int v_signmask(const v_int16x16& a)
|
||||
{
|
||||
v_int8x32 v = v_int8x32(_mm256_packs_epi16(a.val, a.val));
|
||||
return v_signmask(v) & 255;
|
||||
}
|
||||
{ return v_signmask(v_pack(a, a)) & 0xFFFF; }
|
||||
inline int v_signmask(const v_uint16x16& a)
|
||||
{ return v_signmask(v_reinterpret_as_s16(a)); }
|
||||
|
||||
inline int v_signmask(const v_int32x8& a)
|
||||
{
|
||||
__m256i a16 = _mm256_packs_epi32(a.val, a.val);
|
||||
v_int8x32 v = v_int8x32(_mm256_packs_epi16(a16, a16));
|
||||
return v_signmask(v) & 15;
|
||||
}
|
||||
inline int v_signmask(const v_uint32x8& a)
|
||||
{ return v_signmask(v_reinterpret_as_s32(a)); }
|
||||
|
||||
inline int v_signmask(const v_float32x8& a)
|
||||
{ return _mm256_movemask_ps(a.val); }
|
||||
inline int v_signmask(const v_float64x4& a)
|
||||
{ return _mm256_movemask_pd(a.val); }
|
||||
|
||||
inline int v_signmask(const v_int32x8& a)
|
||||
{ return v_signmask(v_reinterpret_as_f32(a)); }
|
||||
inline int v_signmask(const v_uint32x8& a)
|
||||
{ return v_signmask(v_reinterpret_as_f32(a)); }
|
||||
|
||||
inline int v_signmask(const v_int64x4& a)
|
||||
{ return v_signmask(v_reinterpret_as_f64(a)); }
|
||||
inline int v_signmask(const v_uint64x4& a)
|
||||
{ return v_signmask(v_reinterpret_as_f64(a)); }
|
||||
|
||||
inline int v_scan_forward(const v_int8x32& a) { return trailingZeros32(v_signmask(v_reinterpret_as_s8(a))); }
|
||||
inline int v_scan_forward(const v_uint8x32& a) { return trailingZeros32(v_signmask(v_reinterpret_as_s8(a))); }
|
||||
inline int v_scan_forward(const v_int16x16& a) { return trailingZeros32(v_signmask(v_reinterpret_as_s8(a))) / 2; }
|
||||
inline int v_scan_forward(const v_uint16x16& a) { return trailingZeros32(v_signmask(v_reinterpret_as_s8(a))) / 2; }
|
||||
inline int v_scan_forward(const v_int32x8& a) { return trailingZeros32(v_signmask(v_reinterpret_as_s8(a))) / 4; }
|
||||
inline int v_scan_forward(const v_uint32x8& a) { return trailingZeros32(v_signmask(v_reinterpret_as_s8(a))) / 4; }
|
||||
inline int v_scan_forward(const v_float32x8& a) { return trailingZeros32(v_signmask(v_reinterpret_as_s8(a))) / 4; }
|
||||
inline int v_scan_forward(const v_int64x4& a) { return trailingZeros32(v_signmask(v_reinterpret_as_s8(a))) / 8; }
|
||||
inline int v_scan_forward(const v_uint64x4& a) { return trailingZeros32(v_signmask(v_reinterpret_as_s8(a))) / 8; }
|
||||
inline int v_scan_forward(const v_float64x4& a) { return trailingZeros32(v_signmask(v_reinterpret_as_s8(a))) / 8; }
|
||||
|
||||
/** Checks **/
|
||||
#define OPENCV_HAL_IMPL_AVX_CHECK(_Tpvec, and_op, allmask) \
|
||||
inline bool v_check_all(const _Tpvec& a) \
|
||||
{ \
|
||||
int mask = v_signmask(v_reinterpret_as_s8(a)); \
|
||||
return and_op(mask, allmask) == allmask; \
|
||||
} \
|
||||
inline bool v_check_any(const _Tpvec& a) \
|
||||
{ \
|
||||
int mask = v_signmask(v_reinterpret_as_s8(a)); \
|
||||
return and_op(mask, allmask) != 0; \
|
||||
}
|
||||
|
||||
OPENCV_HAL_IMPL_AVX_CHECK(v_uint8x32, OPENCV_HAL_1ST, -1)
|
||||
OPENCV_HAL_IMPL_AVX_CHECK(v_int8x32, OPENCV_HAL_1ST, -1)
|
||||
OPENCV_HAL_IMPL_AVX_CHECK(v_uint16x16, OPENCV_HAL_AND, (int)0xaaaa)
|
||||
OPENCV_HAL_IMPL_AVX_CHECK(v_int16x16, OPENCV_HAL_AND, (int)0xaaaa)
|
||||
OPENCV_HAL_IMPL_AVX_CHECK(v_uint32x8, OPENCV_HAL_AND, (int)0x8888)
|
||||
OPENCV_HAL_IMPL_AVX_CHECK(v_int32x8, OPENCV_HAL_AND, (int)0x8888)
|
||||
|
||||
#define OPENCV_HAL_IMPL_AVX_CHECK_FLT(_Tpvec, allmask) \
|
||||
inline bool v_check_all(const _Tpvec& a) \
|
||||
{ \
|
||||
int mask = v_signmask(a); \
|
||||
return mask == allmask; \
|
||||
} \
|
||||
inline bool v_check_any(const _Tpvec& a) \
|
||||
{ \
|
||||
int mask = v_signmask(a); \
|
||||
return mask != 0; \
|
||||
}
|
||||
|
||||
OPENCV_HAL_IMPL_AVX_CHECK_FLT(v_float32x8, 255)
|
||||
OPENCV_HAL_IMPL_AVX_CHECK_FLT(v_float64x4, 15)
|
||||
#define OPENCV_HAL_IMPL_AVX_CHECK(_Tpvec, allmask) \
|
||||
inline bool v_check_all(const _Tpvec& a) { return v_signmask(a) == allmask; } \
|
||||
inline bool v_check_any(const _Tpvec& a) { return v_signmask(a) != 0; }
|
||||
OPENCV_HAL_IMPL_AVX_CHECK(v_uint8x32, -1)
|
||||
OPENCV_HAL_IMPL_AVX_CHECK(v_int8x32, -1)
|
||||
OPENCV_HAL_IMPL_AVX_CHECK(v_uint32x8, 255)
|
||||
OPENCV_HAL_IMPL_AVX_CHECK(v_int32x8, 255)
|
||||
OPENCV_HAL_IMPL_AVX_CHECK(v_uint64x4, 15)
|
||||
OPENCV_HAL_IMPL_AVX_CHECK(v_int64x4, 15)
|
||||
OPENCV_HAL_IMPL_AVX_CHECK(v_float32x8, 255)
|
||||
OPENCV_HAL_IMPL_AVX_CHECK(v_float64x4, 15)
|
||||
|
||||
#define OPENCV_HAL_IMPL_AVX_CHECK_SHORT(_Tpvec) \
|
||||
inline bool v_check_all(const _Tpvec& a) { return (v_signmask(v_reinterpret_as_s8(a)) & 0xaaaaaaaa) == 0xaaaaaaaa; } \
|
||||
inline bool v_check_any(const _Tpvec& a) { return (v_signmask(v_reinterpret_as_s8(a)) & 0xaaaaaaaa) != 0; }
|
||||
OPENCV_HAL_IMPL_AVX_CHECK_SHORT(v_uint16x16)
|
||||
OPENCV_HAL_IMPL_AVX_CHECK_SHORT(v_int16x16)
|
||||
|
||||
////////// Other math /////////
|
||||
|
||||
|
@ -1400,7 +1527,7 @@ inline v_float32x8 v_cvt_f32(const v_float64x4& a)
|
|||
inline v_float32x8 v_cvt_f32(const v_float64x4& a, const v_float64x4& b)
|
||||
{
|
||||
__m128 af = _mm256_cvtpd_ps(a.val), bf = _mm256_cvtpd_ps(b.val);
|
||||
return v_float32x8(_mm256_insertf128_ps(_mm256_castps128_ps256(af), bf, 1));
|
||||
return v_float32x8(_v256_combine(af, bf));
|
||||
}
|
||||
|
||||
inline v_float64x4 v_cvt_f64(const v_int32x8& a)
|
||||
|
@ -1415,6 +1542,28 @@ inline v_float64x4 v_cvt_f64(const v_float32x8& a)
|
|||
inline v_float64x4 v_cvt_f64_high(const v_float32x8& a)
|
||||
{ return v_float64x4(_mm256_cvtps_pd(_v256_extract_high(a.val))); }
|
||||
|
||||
// from (Mysticial and wim) https://stackoverflow.com/q/41144668
|
||||
inline v_float64x4 v_cvt_f64(const v_int64x4& v)
|
||||
{
|
||||
// constants encoded as floating-point
|
||||
__m256i magic_i_lo = _mm256_set1_epi64x(0x4330000000000000); // 2^52
|
||||
__m256i magic_i_hi32 = _mm256_set1_epi64x(0x4530000080000000); // 2^84 + 2^63
|
||||
__m256i magic_i_all = _mm256_set1_epi64x(0x4530000080100000); // 2^84 + 2^63 + 2^52
|
||||
__m256d magic_d_all = _mm256_castsi256_pd(magic_i_all);
|
||||
|
||||
// Blend the 32 lowest significant bits of v with magic_int_lo
|
||||
__m256i v_lo = _mm256_blend_epi32(magic_i_lo, v.val, 0x55);
|
||||
// Extract the 32 most significant bits of v
|
||||
__m256i v_hi = _mm256_srli_epi64(v.val, 32);
|
||||
// Flip the msb of v_hi and blend with 0x45300000
|
||||
v_hi = _mm256_xor_si256(v_hi, magic_i_hi32);
|
||||
// Compute in double precision
|
||||
__m256d v_hi_dbl = _mm256_sub_pd(_mm256_castsi256_pd(v_hi), magic_d_all);
|
||||
// (v_hi - magic_d_all) + v_lo Do not assume associativity of floating point addition
|
||||
__m256d result = _mm256_add_pd(v_hi_dbl, _mm256_castsi256_pd(v_lo));
|
||||
return v_float64x4(result);
|
||||
}
|
||||
|
||||
////////////// Lookup table access ////////////////////
|
||||
|
||||
inline v_int8x32 v256_lut(const schar* tab, const int* idx)
|
||||
|
@ -1474,7 +1623,7 @@ inline v_int32x8 v256_lut_pairs(const int* tab, const int* idx)
|
|||
}
|
||||
inline v_int32x8 v256_lut_quads(const int* tab, const int* idx)
|
||||
{
|
||||
return v_int32x8(_mm256_insertf128_si256(_mm256_castsi128_si256(_mm_loadu_si128((const __m128i*)(tab + idx[0]))), _mm_loadu_si128((const __m128i*)(tab + idx[1])), 0x1));
|
||||
return v_int32x8(_v256_combine(_mm_loadu_si128((const __m128i*)(tab + idx[0])), _mm_loadu_si128((const __m128i*)(tab + idx[1]))));
|
||||
}
|
||||
inline v_uint32x8 v256_lut(const unsigned* tab, const int* idx) { return v_reinterpret_as_u32(v256_lut((const int *)tab, idx)); }
|
||||
inline v_uint32x8 v256_lut_pairs(const unsigned* tab, const int* idx) { return v_reinterpret_as_u32(v256_lut_pairs((const int *)tab, idx)); }
|
||||
|
@ -1490,7 +1639,7 @@ inline v_int64x4 v256_lut(const int64* tab, const int* idx)
|
|||
}
|
||||
inline v_int64x4 v256_lut_pairs(const int64* tab, const int* idx)
|
||||
{
|
||||
return v_int64x4(_mm256_insertf128_si256(_mm256_castsi128_si256(_mm_loadu_si128((const __m128i*)(tab + idx[0]))), _mm_loadu_si128((const __m128i*)(tab + idx[1])), 0x1));
|
||||
return v_int64x4(_v256_combine(_mm_loadu_si128((const __m128i*)(tab + idx[0])), _mm_loadu_si128((const __m128i*)(tab + idx[1]))));
|
||||
}
|
||||
inline v_uint64x4 v256_lut(const uint64* tab, const int* idx) { return v_reinterpret_as_u64(v256_lut((const int64 *)tab, idx)); }
|
||||
inline v_uint64x4 v256_lut_pairs(const uint64* tab, const int* idx) { return v_reinterpret_as_u64(v256_lut_pairs((const int64 *)tab, idx)); }
|
||||
|
@ -1506,7 +1655,7 @@ inline v_float64x4 v256_lut(const double* tab, const int* idx)
|
|||
{
|
||||
return v_float64x4(_mm256_i32gather_pd(tab, _mm_loadu_si128((const __m128i*)idx), 8));
|
||||
}
|
||||
inline v_float64x4 v256_lut_pairs(const double* tab, const int* idx) { return v_float64x4(_mm256_insertf128_pd(_mm256_castpd128_pd256(_mm_loadu_pd(tab + idx[0])), _mm_loadu_pd(tab + idx[1]), 0x1)); }
|
||||
inline v_float64x4 v256_lut_pairs(const double* tab, const int* idx) { return v_float64x4(_v256_combine(_mm_loadu_pd(tab + idx[0]), _mm_loadu_pd(tab + idx[1]))); }
|
||||
|
||||
inline v_int32x8 v_lut(const int* tab, const v_int32x8& idxvec)
|
||||
{
|
||||
|
@ -1622,12 +1771,165 @@ inline v_float32x8 v_pack_triplets(const v_float32x8& vec)
|
|||
|
||||
////////// Matrix operations /////////
|
||||
|
||||
//////// Dot Product ////////
|
||||
|
||||
// 16 >> 32
|
||||
inline v_int32x8 v_dotprod(const v_int16x16& a, const v_int16x16& b)
|
||||
{ return v_int32x8(_mm256_madd_epi16(a.val, b.val)); }
|
||||
|
||||
inline v_int32x8 v_dotprod(const v_int16x16& a, const v_int16x16& b, const v_int32x8& c)
|
||||
{ return v_dotprod(a, b) + c; }
|
||||
|
||||
// 32 >> 64
|
||||
inline v_int64x4 v_dotprod(const v_int32x8& a, const v_int32x8& b)
|
||||
{
|
||||
__m256i even = _mm256_mul_epi32(a.val, b.val);
|
||||
__m256i odd = _mm256_mul_epi32(_mm256_srli_epi64(a.val, 32), _mm256_srli_epi64(b.val, 32));
|
||||
return v_int64x4(_mm256_add_epi64(even, odd));
|
||||
}
|
||||
inline v_int64x4 v_dotprod(const v_int32x8& a, const v_int32x8& b, const v_int64x4& c)
|
||||
{ return v_dotprod(a, b) + c; }
|
||||
|
||||
// 8 >> 32
|
||||
inline v_uint32x8 v_dotprod_expand(const v_uint8x32& a, const v_uint8x32& b)
|
||||
{
|
||||
__m256i even_m = _mm256_set1_epi32(0xFF00FF00);
|
||||
__m256i even_a = _mm256_blendv_epi8(a.val, _mm256_setzero_si256(), even_m);
|
||||
__m256i odd_a = _mm256_srli_epi16(a.val, 8);
|
||||
|
||||
__m256i even_b = _mm256_blendv_epi8(b.val, _mm256_setzero_si256(), even_m);
|
||||
__m256i odd_b = _mm256_srli_epi16(b.val, 8);
|
||||
|
||||
__m256i prod0 = _mm256_madd_epi16(even_a, even_b);
|
||||
__m256i prod1 = _mm256_madd_epi16(odd_a, odd_b);
|
||||
return v_uint32x8(_mm256_add_epi32(prod0, prod1));
|
||||
}
|
||||
inline v_uint32x8 v_dotprod_expand(const v_uint8x32& a, const v_uint8x32& b, const v_uint32x8& c)
|
||||
{ return v_dotprod_expand(a, b) + c; }
|
||||
|
||||
inline v_int32x8 v_dotprod_expand(const v_int8x32& a, const v_int8x32& b)
|
||||
{
|
||||
__m256i even_a = _mm256_srai_epi16(_mm256_bslli_epi128(a.val, 1), 8);
|
||||
__m256i odd_a = _mm256_srai_epi16(a.val, 8);
|
||||
|
||||
__m256i even_b = _mm256_srai_epi16(_mm256_bslli_epi128(b.val, 1), 8);
|
||||
__m256i odd_b = _mm256_srai_epi16(b.val, 8);
|
||||
|
||||
__m256i prod0 = _mm256_madd_epi16(even_a, even_b);
|
||||
__m256i prod1 = _mm256_madd_epi16(odd_a, odd_b);
|
||||
return v_int32x8(_mm256_add_epi32(prod0, prod1));
|
||||
}
|
||||
inline v_int32x8 v_dotprod_expand(const v_int8x32& a, const v_int8x32& b, const v_int32x8& c)
|
||||
{ return v_dotprod_expand(a, b) + c; }
|
||||
|
||||
// 16 >> 64
|
||||
inline v_uint64x4 v_dotprod_expand(const v_uint16x16& a, const v_uint16x16& b)
|
||||
{
|
||||
__m256i mullo = _mm256_mullo_epi16(a.val, b.val);
|
||||
__m256i mulhi = _mm256_mulhi_epu16(a.val, b.val);
|
||||
__m256i mul0 = _mm256_unpacklo_epi16(mullo, mulhi);
|
||||
__m256i mul1 = _mm256_unpackhi_epi16(mullo, mulhi);
|
||||
|
||||
__m256i p02 = _mm256_blend_epi32(mul0, _mm256_setzero_si256(), 0xAA);
|
||||
__m256i p13 = _mm256_srli_epi64(mul0, 32);
|
||||
__m256i p46 = _mm256_blend_epi32(mul1, _mm256_setzero_si256(), 0xAA);
|
||||
__m256i p57 = _mm256_srli_epi64(mul1, 32);
|
||||
|
||||
__m256i p15_ = _mm256_add_epi64(p02, p13);
|
||||
__m256i p9d_ = _mm256_add_epi64(p46, p57);
|
||||
|
||||
return v_uint64x4(_mm256_add_epi64(
|
||||
_mm256_unpacklo_epi64(p15_, p9d_),
|
||||
_mm256_unpackhi_epi64(p15_, p9d_)
|
||||
));
|
||||
}
|
||||
inline v_uint64x4 v_dotprod_expand(const v_uint16x16& a, const v_uint16x16& b, const v_uint64x4& c)
|
||||
{ return v_dotprod_expand(a, b) + c; }
|
||||
|
||||
inline v_int64x4 v_dotprod_expand(const v_int16x16& a, const v_int16x16& b)
|
||||
{
|
||||
__m256i prod = _mm256_madd_epi16(a.val, b.val);
|
||||
__m256i sign = _mm256_srai_epi32(prod, 31);
|
||||
|
||||
__m256i lo = _mm256_unpacklo_epi32(prod, sign);
|
||||
__m256i hi = _mm256_unpackhi_epi32(prod, sign);
|
||||
|
||||
return v_int64x4(_mm256_add_epi64(
|
||||
_mm256_unpacklo_epi64(lo, hi),
|
||||
_mm256_unpackhi_epi64(lo, hi)
|
||||
));
|
||||
}
|
||||
inline v_int64x4 v_dotprod_expand(const v_int16x16& a, const v_int16x16& b, const v_int64x4& c)
|
||||
{ return v_dotprod_expand(a, b) + c; }
|
||||
|
||||
// 32 >> 64f
|
||||
inline v_float64x4 v_dotprod_expand(const v_int32x8& a, const v_int32x8& b)
|
||||
{ return v_cvt_f64(v_dotprod(a, b)); }
|
||||
inline v_float64x4 v_dotprod_expand(const v_int32x8& a, const v_int32x8& b, const v_float64x4& c)
|
||||
{ return v_dotprod_expand(a, b) + c; }
|
||||
|
||||
//////// Fast Dot Product ////////
|
||||
|
||||
// 16 >> 32
|
||||
inline v_int32x8 v_dotprod_fast(const v_int16x16& a, const v_int16x16& b)
|
||||
{ return v_dotprod(a, b); }
|
||||
inline v_int32x8 v_dotprod_fast(const v_int16x16& a, const v_int16x16& b, const v_int32x8& c)
|
||||
{ return v_dotprod(a, b, c); }
|
||||
|
||||
// 32 >> 64
|
||||
inline v_int64x4 v_dotprod_fast(const v_int32x8& a, const v_int32x8& b)
|
||||
{ return v_dotprod(a, b); }
|
||||
inline v_int64x4 v_dotprod_fast(const v_int32x8& a, const v_int32x8& b, const v_int64x4& c)
|
||||
{ return v_dotprod(a, b, c); }
|
||||
|
||||
// 8 >> 32
|
||||
inline v_uint32x8 v_dotprod_expand_fast(const v_uint8x32& a, const v_uint8x32& b)
|
||||
{ return v_dotprod_expand(a, b); }
|
||||
inline v_uint32x8 v_dotprod_expand_fast(const v_uint8x32& a, const v_uint8x32& b, const v_uint32x8& c)
|
||||
{ return v_dotprod_expand(a, b, c); }
|
||||
|
||||
inline v_int32x8 v_dotprod_expand_fast(const v_int8x32& a, const v_int8x32& b)
|
||||
{ return v_dotprod_expand(a, b); }
|
||||
inline v_int32x8 v_dotprod_expand_fast(const v_int8x32& a, const v_int8x32& b, const v_int32x8& c)
|
||||
{ return v_dotprod_expand(a, b, c); }
|
||||
|
||||
// 16 >> 64
|
||||
inline v_uint64x4 v_dotprod_expand_fast(const v_uint16x16& a, const v_uint16x16& b)
|
||||
{
|
||||
__m256i mullo = _mm256_mullo_epi16(a.val, b.val);
|
||||
__m256i mulhi = _mm256_mulhi_epu16(a.val, b.val);
|
||||
__m256i mul0 = _mm256_unpacklo_epi16(mullo, mulhi);
|
||||
__m256i mul1 = _mm256_unpackhi_epi16(mullo, mulhi);
|
||||
|
||||
__m256i p02 = _mm256_blend_epi32(mul0, _mm256_setzero_si256(), 0xAA);
|
||||
__m256i p13 = _mm256_srli_epi64(mul0, 32);
|
||||
__m256i p46 = _mm256_blend_epi32(mul1, _mm256_setzero_si256(), 0xAA);
|
||||
__m256i p57 = _mm256_srli_epi64(mul1, 32);
|
||||
|
||||
__m256i p15_ = _mm256_add_epi64(p02, p13);
|
||||
__m256i p9d_ = _mm256_add_epi64(p46, p57);
|
||||
|
||||
return v_uint64x4(_mm256_add_epi64(p15_, p9d_));
|
||||
}
|
||||
inline v_uint64x4 v_dotprod_expand_fast(const v_uint16x16& a, const v_uint16x16& b, const v_uint64x4& c)
|
||||
{ return v_dotprod_expand_fast(a, b) + c; }
|
||||
|
||||
inline v_int64x4 v_dotprod_expand_fast(const v_int16x16& a, const v_int16x16& b)
|
||||
{
|
||||
__m256i prod = _mm256_madd_epi16(a.val, b.val);
|
||||
__m256i sign = _mm256_srai_epi32(prod, 31);
|
||||
__m256i lo = _mm256_unpacklo_epi32(prod, sign);
|
||||
__m256i hi = _mm256_unpackhi_epi32(prod, sign);
|
||||
return v_int64x4(_mm256_add_epi64(lo, hi));
|
||||
}
|
||||
inline v_int64x4 v_dotprod_expand_fast(const v_int16x16& a, const v_int16x16& b, const v_int64x4& c)
|
||||
{ return v_dotprod_expand_fast(a, b) + c; }
|
||||
|
||||
// 32 >> 64f
|
||||
inline v_float64x4 v_dotprod_expand_fast(const v_int32x8& a, const v_int32x8& b)
|
||||
{ return v_dotprod_expand(a, b); }
|
||||
inline v_float64x4 v_dotprod_expand_fast(const v_int32x8& a, const v_int32x8& b, const v_float64x4& c)
|
||||
{ return v_dotprod_expand(a, b, c); }
|
||||
|
||||
#define OPENCV_HAL_AVX_SPLAT2_PS(a, im) \
|
||||
v_float32x8(_mm256_permute_ps(a.val, _MM_SHUFFLE(im, im, im, im)))
|
||||
|
||||
|
@ -1956,6 +2258,85 @@ OPENCV_HAL_IMPL_AVX_EXTRACT(v_int64x4)
|
|||
OPENCV_HAL_IMPL_AVX_EXTRACT(v_float32x8)
|
||||
OPENCV_HAL_IMPL_AVX_EXTRACT(v_float64x4)
|
||||
|
||||
template<int i>
|
||||
inline uchar v_extract_n(v_uint8x32 a)
|
||||
{
|
||||
return (uchar)_v256_extract_epi8<i>(a.val);
|
||||
}
|
||||
|
||||
template<int i>
|
||||
inline schar v_extract_n(v_int8x32 a)
|
||||
{
|
||||
return (schar)v_extract_n<i>(v_reinterpret_as_u8(a));
|
||||
}
|
||||
|
||||
template<int i>
|
||||
inline ushort v_extract_n(v_uint16x16 a)
|
||||
{
|
||||
return (ushort)_v256_extract_epi16<i>(a.val);
|
||||
}
|
||||
|
||||
template<int i>
|
||||
inline short v_extract_n(v_int16x16 a)
|
||||
{
|
||||
return (short)v_extract_n<i>(v_reinterpret_as_u16(a));
|
||||
}
|
||||
|
||||
template<int i>
|
||||
inline uint v_extract_n(v_uint32x8 a)
|
||||
{
|
||||
return (uint)_v256_extract_epi32<i>(a.val);
|
||||
}
|
||||
|
||||
template<int i>
|
||||
inline int v_extract_n(v_int32x8 a)
|
||||
{
|
||||
return (int)v_extract_n<i>(v_reinterpret_as_u32(a));
|
||||
}
|
||||
|
||||
template<int i>
|
||||
inline uint64 v_extract_n(v_uint64x4 a)
|
||||
{
|
||||
return (uint64)_v256_extract_epi64<i>(a.val);
|
||||
}
|
||||
|
||||
template<int i>
|
||||
inline int64 v_extract_n(v_int64x4 v)
|
||||
{
|
||||
return (int64)v_extract_n<i>(v_reinterpret_as_u64(v));
|
||||
}
|
||||
|
||||
template<int i>
|
||||
inline float v_extract_n(v_float32x8 v)
|
||||
{
|
||||
union { uint iv; float fv; } d;
|
||||
d.iv = v_extract_n<i>(v_reinterpret_as_u32(v));
|
||||
return d.fv;
|
||||
}
|
||||
|
||||
template<int i>
|
||||
inline double v_extract_n(v_float64x4 v)
|
||||
{
|
||||
union { uint64 iv; double dv; } d;
|
||||
d.iv = v_extract_n<i>(v_reinterpret_as_u64(v));
|
||||
return d.dv;
|
||||
}
|
||||
|
||||
template<int i>
|
||||
inline v_uint32x8 v_broadcast_element(v_uint32x8 a)
|
||||
{
|
||||
static const __m256i perm = _mm256_set1_epi32((char)i);
|
||||
return v_uint32x8(_mm256_permutevar8x32_epi32(a.val, perm));
|
||||
}
|
||||
|
||||
template<int i>
|
||||
inline v_int32x8 v_broadcast_element(const v_int32x8 &a)
|
||||
{ return v_reinterpret_as_s32(v_broadcast_element<i>(v_reinterpret_as_u32(a))); }
|
||||
|
||||
template<int i>
|
||||
inline v_float32x8 v_broadcast_element(const v_float32x8 &a)
|
||||
{ return v_reinterpret_as_f32(v_broadcast_element<i>(v_reinterpret_as_u32(a))); }
|
||||
|
||||
|
||||
///////////////////// load deinterleave /////////////////////////////
|
||||
|
||||
|
@ -2740,29 +3121,41 @@ OPENCV_HAL_IMPL_AVX_LOADSTORE_INTERLEAVE(v_float32x8, float, f32, v_uint32x8, un
|
|||
OPENCV_HAL_IMPL_AVX_LOADSTORE_INTERLEAVE(v_int64x4, int64, s64, v_uint64x4, uint64, u64)
|
||||
OPENCV_HAL_IMPL_AVX_LOADSTORE_INTERLEAVE(v_float64x4, double, f64, v_uint64x4, uint64, u64)
|
||||
|
||||
//
|
||||
// FP16
|
||||
//
|
||||
|
||||
inline v_float32x8 v256_load_expand(const float16_t* ptr)
|
||||
{
|
||||
#if CV_FP16
|
||||
return v_float32x8(_mm256_cvtph_ps(_mm_loadu_si128((const __m128i*)ptr)));
|
||||
#else
|
||||
float CV_DECL_ALIGNED(32) buf[8];
|
||||
for (int i = 0; i < 8; i++)
|
||||
buf[i] = (float)ptr[i];
|
||||
return v256_load_aligned(buf);
|
||||
#endif
|
||||
}
|
||||
|
||||
inline void v_pack_store(float16_t* ptr, const v_float32x8& a)
|
||||
{
|
||||
#if CV_FP16
|
||||
__m128i ah = _mm256_cvtps_ph(a.val, 0);
|
||||
_mm_storeu_si128((__m128i*)ptr, ah);
|
||||
#else
|
||||
float CV_DECL_ALIGNED(32) buf[8];
|
||||
v_store_aligned(buf, a);
|
||||
for (int i = 0; i < 8; i++)
|
||||
ptr[i] = float16_t(buf[i]);
|
||||
#endif
|
||||
}
|
||||
|
||||
//
|
||||
// end of FP16
|
||||
//
|
||||
|
||||
inline void v256_cleanup() { _mm256_zeroall(); }
|
||||
|
||||
//! @name Check SIMD256 support
|
||||
//! @{
|
||||
//! @brief Check CPU capability of SIMD operation
|
||||
static inline bool hasSIMD256()
|
||||
{
|
||||
return (CV_CPU_HAS_SUPPORT_AVX2) ? true : false;
|
||||
}
|
||||
//! @}
|
||||
|
||||
CV_CPU_OPTIMIZATION_HAL_NAMESPACE_END
|
||||
|
||||
//! @endcond
|
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
|
@ -14,9 +14,32 @@ namespace cv
|
|||
CV_CPU_OPTIMIZATION_HAL_NAMESPACE_BEGIN
|
||||
|
||||
/** Types **/
|
||||
#if CV__SIMD_FORWARD == 512
|
||||
// [todo] 512
|
||||
#error "AVX512 Not implemented yet"
|
||||
#if CV__SIMD_FORWARD == 1024
|
||||
// [todo] 1024
|
||||
#error "1024-long ops not implemented yet"
|
||||
#elif CV__SIMD_FORWARD == 512
|
||||
// 512
|
||||
#define __CV_VX(fun) v512_##fun
|
||||
#define __CV_V_UINT8 v_uint8x64
|
||||
#define __CV_V_INT8 v_int8x64
|
||||
#define __CV_V_UINT16 v_uint16x32
|
||||
#define __CV_V_INT16 v_int16x32
|
||||
#define __CV_V_UINT32 v_uint32x16
|
||||
#define __CV_V_INT32 v_int32x16
|
||||
#define __CV_V_UINT64 v_uint64x8
|
||||
#define __CV_V_INT64 v_int64x8
|
||||
#define __CV_V_FLOAT32 v_float32x16
|
||||
#define __CV_V_FLOAT64 v_float64x8
|
||||
struct v_uint8x64;
|
||||
struct v_int8x64;
|
||||
struct v_uint16x32;
|
||||
struct v_int16x32;
|
||||
struct v_uint32x16;
|
||||
struct v_int32x16;
|
||||
struct v_uint64x8;
|
||||
struct v_int64x8;
|
||||
struct v_float32x16;
|
||||
struct v_float64x8;
|
||||
#elif CV__SIMD_FORWARD == 256
|
||||
// 256
|
||||
#define __CV_VX(fun) v256_##fun
|
||||
|
@ -137,6 +160,16 @@ void v_mul_expand(const __CV_V_UINT32&, const __CV_V_UINT32&, __CV_V_UINT64&, __
|
|||
void v_mul_expand(const __CV_V_INT32&, const __CV_V_INT32&, __CV_V_INT64&, __CV_V_INT64&);
|
||||
#endif
|
||||
|
||||
// Conversions
|
||||
__CV_V_FLOAT32 v_cvt_f32(const __CV_V_INT32& a);
|
||||
__CV_V_FLOAT32 v_cvt_f32(const __CV_V_FLOAT64& a);
|
||||
__CV_V_FLOAT32 v_cvt_f32(const __CV_V_FLOAT64& a, const __CV_V_FLOAT64& b);
|
||||
__CV_V_FLOAT64 v_cvt_f64(const __CV_V_INT32& a);
|
||||
__CV_V_FLOAT64 v_cvt_f64_high(const __CV_V_INT32& a);
|
||||
__CV_V_FLOAT64 v_cvt_f64(const __CV_V_FLOAT32& a);
|
||||
__CV_V_FLOAT64 v_cvt_f64_high(const __CV_V_FLOAT32& a);
|
||||
__CV_V_FLOAT64 v_cvt_f64(const __CV_V_INT64& a);
|
||||
|
||||
/** Cleanup **/
|
||||
#undef CV__SIMD_FORWARD
|
||||
#undef __CV_VX
|
File diff suppressed because it is too large
Load Diff
|
@ -56,29 +56,85 @@ namespace cv
|
|||
CV_CPU_OPTIMIZATION_HAL_NAMESPACE_BEGIN
|
||||
|
||||
#define CV_SIMD128 1
|
||||
#if defined(__aarch64__)
|
||||
#if defined(__aarch64__) || defined(_M_ARM64)
|
||||
#define CV_SIMD128_64F 1
|
||||
#else
|
||||
#define CV_SIMD128_64F 0
|
||||
#endif
|
||||
|
||||
// The following macro checks if the code is being compiled for the
|
||||
// AArch64 execution state of Armv8, to enable the 128-bit
|
||||
// intrinsics. The macro `__ARM_64BIT_STATE` is the one recommended by
|
||||
// the Arm C Language Extension (ACLE) specifications [1] to check the
|
||||
// availability of 128-bit intrinsics, and it is supporrted by clang
|
||||
// and gcc. The macro `_M_ARM64` is the equivalent one for Microsoft
|
||||
// Visual Studio [2] .
|
||||
//
|
||||
// [1] https://developer.arm.com/documentation/101028/0012/13--Advanced-SIMD--Neon--intrinsics
|
||||
// [2] https://docs.microsoft.com/en-us/cpp/preprocessor/predefined-macros
|
||||
#if defined(__ARM_64BIT_STATE) || defined(_M_ARM64)
|
||||
#define CV_NEON_AARCH64 1
|
||||
#else
|
||||
#define CV_NEON_AARCH64 0
|
||||
#endif
|
||||
|
||||
// TODO
|
||||
#define CV_NEON_DOT 0
|
||||
|
||||
//////////// Utils ////////////
|
||||
|
||||
#if CV_SIMD128_64F
|
||||
#define OPENCV_HAL_IMPL_NEON_UNZIP(_Tpv, _Tpvx2, suffix) \
|
||||
inline void _v128_unzip(const _Tpv& a, const _Tpv& b, _Tpv& c, _Tpv& d) \
|
||||
{ c = vuzp1q_##suffix(a, b); d = vuzp2q_##suffix(a, b); }
|
||||
#define OPENCV_HAL_IMPL_NEON_UNZIP_L(_Tpv, _Tpvx2, suffix) \
|
||||
inline void _v128_unzip(const _Tpv&a, const _Tpv&b, _Tpv& c, _Tpv& d) \
|
||||
{ c = vuzp1_##suffix(a, b); d = vuzp2_##suffix(a, b); }
|
||||
#else
|
||||
#define OPENCV_HAL_IMPL_NEON_UNZIP(_Tpv, _Tpvx2, suffix) \
|
||||
inline void _v128_unzip(const _Tpv& a, const _Tpv& b, _Tpv& c, _Tpv& d) \
|
||||
{ _Tpvx2 ab = vuzpq_##suffix(a, b); c = ab.val[0]; d = ab.val[1]; }
|
||||
#define OPENCV_HAL_IMPL_NEON_UNZIP_L(_Tpv, _Tpvx2, suffix) \
|
||||
inline void _v128_unzip(const _Tpv& a, const _Tpv& b, _Tpv& c, _Tpv& d) \
|
||||
{ _Tpvx2 ab = vuzp_##suffix(a, b); c = ab.val[0]; d = ab.val[1]; }
|
||||
#endif
|
||||
|
||||
#if CV_SIMD128_64F
|
||||
#define OPENCV_HAL_IMPL_NEON_REINTERPRET(_Tpv, suffix) \
|
||||
template <typename T> static inline \
|
||||
_Tpv vreinterpretq_##suffix##_f64(T a) { return (_Tpv) a; } \
|
||||
template <typename T> static inline \
|
||||
float64x2_t vreinterpretq_f64_##suffix(T a) { return (float64x2_t) a; }
|
||||
OPENCV_HAL_IMPL_NEON_REINTERPRET(uint8x16_t, u8)
|
||||
OPENCV_HAL_IMPL_NEON_REINTERPRET(int8x16_t, s8)
|
||||
OPENCV_HAL_IMPL_NEON_REINTERPRET(uint16x8_t, u16)
|
||||
OPENCV_HAL_IMPL_NEON_REINTERPRET(int16x8_t, s16)
|
||||
OPENCV_HAL_IMPL_NEON_REINTERPRET(uint32x4_t, u32)
|
||||
OPENCV_HAL_IMPL_NEON_REINTERPRET(int32x4_t, s32)
|
||||
OPENCV_HAL_IMPL_NEON_REINTERPRET(uint64x2_t, u64)
|
||||
OPENCV_HAL_IMPL_NEON_REINTERPRET(int64x2_t, s64)
|
||||
OPENCV_HAL_IMPL_NEON_REINTERPRET(float32x4_t, f32)
|
||||
template <typename T> static inline \
|
||||
_Tpv vreinterpretq_##suffix##_f64(T a) { return (_Tpv) a; } \
|
||||
template <typename T> static inline \
|
||||
float64x2_t vreinterpretq_f64_##suffix(T a) { return (float64x2_t) a; }
|
||||
#else
|
||||
#define OPENCV_HAL_IMPL_NEON_REINTERPRET(_Tpv, suffix)
|
||||
#endif
|
||||
|
||||
#define OPENCV_HAL_IMPL_NEON_UTILS_SUFFIX(_Tpv, _Tpvl, suffix) \
|
||||
OPENCV_HAL_IMPL_NEON_UNZIP(_Tpv##_t, _Tpv##x2_t, suffix) \
|
||||
OPENCV_HAL_IMPL_NEON_UNZIP_L(_Tpvl##_t, _Tpvl##x2_t, suffix) \
|
||||
OPENCV_HAL_IMPL_NEON_REINTERPRET(_Tpv##_t, suffix)
|
||||
|
||||
#define OPENCV_HAL_IMPL_NEON_UTILS_SUFFIX_I64(_Tpv, _Tpvl, suffix) \
|
||||
OPENCV_HAL_IMPL_NEON_REINTERPRET(_Tpv##_t, suffix)
|
||||
|
||||
#define OPENCV_HAL_IMPL_NEON_UTILS_SUFFIX_F64(_Tpv, _Tpvl, suffix) \
|
||||
OPENCV_HAL_IMPL_NEON_UNZIP(_Tpv##_t, _Tpv##x2_t, suffix)
|
||||
|
||||
OPENCV_HAL_IMPL_NEON_UTILS_SUFFIX(uint8x16, uint8x8, u8)
|
||||
OPENCV_HAL_IMPL_NEON_UTILS_SUFFIX(int8x16, int8x8, s8)
|
||||
OPENCV_HAL_IMPL_NEON_UTILS_SUFFIX(uint16x8, uint16x4, u16)
|
||||
OPENCV_HAL_IMPL_NEON_UTILS_SUFFIX(int16x8, int16x4, s16)
|
||||
OPENCV_HAL_IMPL_NEON_UTILS_SUFFIX(uint32x4, uint32x2, u32)
|
||||
OPENCV_HAL_IMPL_NEON_UTILS_SUFFIX(int32x4, int32x2, s32)
|
||||
OPENCV_HAL_IMPL_NEON_UTILS_SUFFIX(float32x4, float32x2, f32)
|
||||
OPENCV_HAL_IMPL_NEON_UTILS_SUFFIX_I64(uint64x2, uint64x1, u64)
|
||||
OPENCV_HAL_IMPL_NEON_UTILS_SUFFIX_I64(int64x2, int64x1, s64)
|
||||
#if CV_SIMD128_64F
|
||||
OPENCV_HAL_IMPL_NEON_UTILS_SUFFIX_F64(float64x2, float64x1,f64)
|
||||
#endif
|
||||
|
||||
//////////// Types ////////////
|
||||
|
||||
struct v_uint8x16
|
||||
{
|
||||
typedef uchar lane_type;
|
||||
|
@ -278,48 +334,6 @@ struct v_float64x2
|
|||
};
|
||||
#endif
|
||||
|
||||
#if CV_FP16
|
||||
// Workaround for old compilers
|
||||
static inline int16x4_t vreinterpret_s16_f16(float16x4_t a) { return (int16x4_t)a; }
|
||||
static inline float16x4_t vreinterpret_f16_s16(int16x4_t a) { return (float16x4_t)a; }
|
||||
|
||||
static inline float16x4_t cv_vld1_f16(const void* ptr)
|
||||
{
|
||||
#ifndef vld1_f16 // APPLE compiler defines vld1_f16 as macro
|
||||
return vreinterpret_f16_s16(vld1_s16((const short*)ptr));
|
||||
#else
|
||||
return vld1_f16((const __fp16*)ptr);
|
||||
#endif
|
||||
}
|
||||
static inline void cv_vst1_f16(void* ptr, float16x4_t a)
|
||||
{
|
||||
#ifndef vst1_f16 // APPLE compiler defines vst1_f16 as macro
|
||||
vst1_s16((short*)ptr, vreinterpret_s16_f16(a));
|
||||
#else
|
||||
vst1_f16((__fp16*)ptr, a);
|
||||
#endif
|
||||
}
|
||||
|
||||
#ifndef vdup_n_f16
|
||||
#define vdup_n_f16(v) (float16x4_t){v, v, v, v}
|
||||
#endif
|
||||
|
||||
#endif // CV_FP16
|
||||
|
||||
#if CV_FP16
|
||||
inline v_float32x4 v128_load_fp16_f32(const short* ptr)
|
||||
{
|
||||
float16x4_t a = cv_vld1_f16((const __fp16*)ptr);
|
||||
return v_float32x4(vcvt_f32_f16(a));
|
||||
}
|
||||
|
||||
inline void v_store_fp16(short* ptr, const v_float32x4& a)
|
||||
{
|
||||
float16x4_t fp16 = vcvt_f16_f32(a.val);
|
||||
cv_vst1_f16((short*)ptr, fp16);
|
||||
}
|
||||
#endif
|
||||
|
||||
#define OPENCV_HAL_IMPL_NEON_INIT(_Tpv, _Tp, suffix) \
|
||||
inline v_##_Tpv v_setzero_##suffix() { return v_##_Tpv(vdupq_n_##suffix((_Tp)0)); } \
|
||||
inline v_##_Tpv v_setall_##suffix(_Tp v) { return v_##_Tpv(vdupq_n_##suffix(v)); } \
|
||||
|
@ -570,20 +584,292 @@ inline v_uint16x8 v_mul_hi(const v_uint16x8& a, const v_uint16x8& b)
|
|||
));
|
||||
}
|
||||
|
||||
//////// Dot Product ////////
|
||||
|
||||
// 16 >> 32
|
||||
inline v_int32x4 v_dotprod(const v_int16x8& a, const v_int16x8& b)
|
||||
{
|
||||
int32x4_t c = vmull_s16(vget_low_s16(a.val), vget_low_s16(b.val));
|
||||
int32x4_t d = vmull_s16(vget_high_s16(a.val), vget_high_s16(b.val));
|
||||
int32x4x2_t cd = vuzpq_s32(c, d);
|
||||
return v_int32x4(vaddq_s32(cd.val[0], cd.val[1]));
|
||||
int16x8_t uzp1, uzp2;
|
||||
_v128_unzip(a.val, b.val, uzp1, uzp2);
|
||||
int16x4_t a0 = vget_low_s16(uzp1);
|
||||
int16x4_t b0 = vget_high_s16(uzp1);
|
||||
int16x4_t a1 = vget_low_s16(uzp2);
|
||||
int16x4_t b1 = vget_high_s16(uzp2);
|
||||
int32x4_t p = vmull_s16(a0, b0);
|
||||
return v_int32x4(vmlal_s16(p, a1, b1));
|
||||
}
|
||||
|
||||
inline v_int32x4 v_dotprod(const v_int16x8& a, const v_int16x8& b, const v_int32x4& c)
|
||||
{
|
||||
v_int32x4 s = v_dotprod(a, b);
|
||||
return v_int32x4(vaddq_s32(s.val , c.val));
|
||||
int16x8_t uzp1, uzp2;
|
||||
_v128_unzip(a.val, b.val, uzp1, uzp2);
|
||||
int16x4_t a0 = vget_low_s16(uzp1);
|
||||
int16x4_t b0 = vget_high_s16(uzp1);
|
||||
int16x4_t a1 = vget_low_s16(uzp2);
|
||||
int16x4_t b1 = vget_high_s16(uzp2);
|
||||
int32x4_t p = vmlal_s16(c.val, a0, b0);
|
||||
return v_int32x4(vmlal_s16(p, a1, b1));
|
||||
}
|
||||
|
||||
// 32 >> 64
|
||||
inline v_int64x2 v_dotprod(const v_int32x4& a, const v_int32x4& b)
|
||||
{
|
||||
int32x4_t uzp1, uzp2;
|
||||
_v128_unzip(a.val, b.val, uzp1, uzp2);
|
||||
int32x2_t a0 = vget_low_s32(uzp1);
|
||||
int32x2_t b0 = vget_high_s32(uzp1);
|
||||
int32x2_t a1 = vget_low_s32(uzp2);
|
||||
int32x2_t b1 = vget_high_s32(uzp2);
|
||||
int64x2_t p = vmull_s32(a0, b0);
|
||||
return v_int64x2(vmlal_s32(p, a1, b1));
|
||||
}
|
||||
inline v_int64x2 v_dotprod(const v_int32x4& a, const v_int32x4& b, const v_int64x2& c)
|
||||
{
|
||||
int32x4_t uzp1, uzp2;
|
||||
_v128_unzip(a.val, b.val, uzp1, uzp2);
|
||||
int32x2_t a0 = vget_low_s32(uzp1);
|
||||
int32x2_t b0 = vget_high_s32(uzp1);
|
||||
int32x2_t a1 = vget_low_s32(uzp2);
|
||||
int32x2_t b1 = vget_high_s32(uzp2);
|
||||
int64x2_t p = vmlal_s32(c.val, a0, b0);
|
||||
return v_int64x2(vmlal_s32(p, a1, b1));
|
||||
}
|
||||
|
||||
// 8 >> 32
|
||||
inline v_uint32x4 v_dotprod_expand(const v_uint8x16& a, const v_uint8x16& b)
|
||||
{
|
||||
#if CV_NEON_DOT
|
||||
return v_uint32x4(vdotq_u32(vdupq_n_u32(0), a.val, b.val));
|
||||
#else
|
||||
const uint8x16_t zero = vreinterpretq_u8_u32(vdupq_n_u32(0));
|
||||
const uint8x16_t mask = vreinterpretq_u8_u32(vdupq_n_u32(0x00FF00FF));
|
||||
const uint16x8_t zero32 = vreinterpretq_u16_u32(vdupq_n_u32(0));
|
||||
const uint16x8_t mask32 = vreinterpretq_u16_u32(vdupq_n_u32(0x0000FFFF));
|
||||
|
||||
uint16x8_t even = vmulq_u16(vreinterpretq_u16_u8(vbslq_u8(mask, a.val, zero)),
|
||||
vreinterpretq_u16_u8(vbslq_u8(mask, b.val, zero)));
|
||||
uint16x8_t odd = vmulq_u16(vshrq_n_u16(vreinterpretq_u16_u8(a.val), 8),
|
||||
vshrq_n_u16(vreinterpretq_u16_u8(b.val), 8));
|
||||
|
||||
uint32x4_t s0 = vaddq_u32(vreinterpretq_u32_u16(vbslq_u16(mask32, even, zero32)),
|
||||
vreinterpretq_u32_u16(vbslq_u16(mask32, odd, zero32)));
|
||||
uint32x4_t s1 = vaddq_u32(vshrq_n_u32(vreinterpretq_u32_u16(even), 16),
|
||||
vshrq_n_u32(vreinterpretq_u32_u16(odd), 16));
|
||||
return v_uint32x4(vaddq_u32(s0, s1));
|
||||
#endif
|
||||
}
|
||||
inline v_uint32x4 v_dotprod_expand(const v_uint8x16& a, const v_uint8x16& b,
|
||||
const v_uint32x4& c)
|
||||
{
|
||||
#if CV_NEON_DOT
|
||||
return v_uint32x4(vdotq_u32(c.val, a.val, b.val));
|
||||
#else
|
||||
return v_dotprod_expand(a, b) + c;
|
||||
#endif
|
||||
}
|
||||
|
||||
inline v_int32x4 v_dotprod_expand(const v_int8x16& a, const v_int8x16& b)
|
||||
{
|
||||
#if CV_NEON_DOT
|
||||
return v_int32x4(vdotq_s32(vdupq_n_s32(0), a.val, b.val));
|
||||
#else
|
||||
int16x8_t p0 = vmull_s8(vget_low_s8(a.val), vget_low_s8(b.val));
|
||||
int16x8_t p1 = vmull_s8(vget_high_s8(a.val), vget_high_s8(b.val));
|
||||
int16x8_t uzp1, uzp2;
|
||||
_v128_unzip(p0, p1, uzp1, uzp2);
|
||||
int16x8_t sum = vaddq_s16(uzp1, uzp2);
|
||||
int16x4_t uzpl1, uzpl2;
|
||||
_v128_unzip(vget_low_s16(sum), vget_high_s16(sum), uzpl1, uzpl2);
|
||||
return v_int32x4(vaddl_s16(uzpl1, uzpl2));
|
||||
#endif
|
||||
}
|
||||
inline v_int32x4 v_dotprod_expand(const v_int8x16& a, const v_int8x16& b,
|
||||
const v_int32x4& c)
|
||||
{
|
||||
#if CV_NEON_DOT
|
||||
return v_int32x4(vdotq_s32(c.val, a.val, b.val));
|
||||
#else
|
||||
return v_dotprod_expand(a, b) + c;
|
||||
#endif
|
||||
}
|
||||
|
||||
// 16 >> 64
|
||||
inline v_uint64x2 v_dotprod_expand(const v_uint16x8& a, const v_uint16x8& b)
|
||||
{
|
||||
const uint16x8_t zero = vreinterpretq_u16_u32(vdupq_n_u32(0));
|
||||
const uint16x8_t mask = vreinterpretq_u16_u32(vdupq_n_u32(0x0000FFFF));
|
||||
|
||||
uint32x4_t even = vmulq_u32(vreinterpretq_u32_u16(vbslq_u16(mask, a.val, zero)),
|
||||
vreinterpretq_u32_u16(vbslq_u16(mask, b.val, zero)));
|
||||
uint32x4_t odd = vmulq_u32(vshrq_n_u32(vreinterpretq_u32_u16(a.val), 16),
|
||||
vshrq_n_u32(vreinterpretq_u32_u16(b.val), 16));
|
||||
uint32x4_t uzp1, uzp2;
|
||||
_v128_unzip(even, odd, uzp1, uzp2);
|
||||
uint64x2_t s0 = vaddl_u32(vget_low_u32(uzp1), vget_high_u32(uzp1));
|
||||
uint64x2_t s1 = vaddl_u32(vget_low_u32(uzp2), vget_high_u32(uzp2));
|
||||
return v_uint64x2(vaddq_u64(s0, s1));
|
||||
}
|
||||
inline v_uint64x2 v_dotprod_expand(const v_uint16x8& a, const v_uint16x8& b, const v_uint64x2& c)
|
||||
{ return v_dotprod_expand(a, b) + c; }
|
||||
|
||||
inline v_int64x2 v_dotprod_expand(const v_int16x8& a, const v_int16x8& b)
|
||||
{
|
||||
int32x4_t p0 = vmull_s16(vget_low_s16(a.val), vget_low_s16(b.val));
|
||||
int32x4_t p1 = vmull_s16(vget_high_s16(a.val), vget_high_s16(b.val));
|
||||
|
||||
int32x4_t uzp1, uzp2;
|
||||
_v128_unzip(p0, p1, uzp1, uzp2);
|
||||
int32x4_t sum = vaddq_s32(uzp1, uzp2);
|
||||
|
||||
int32x2_t uzpl1, uzpl2;
|
||||
_v128_unzip(vget_low_s32(sum), vget_high_s32(sum), uzpl1, uzpl2);
|
||||
return v_int64x2(vaddl_s32(uzpl1, uzpl2));
|
||||
}
|
||||
inline v_int64x2 v_dotprod_expand(const v_int16x8& a, const v_int16x8& b,
|
||||
const v_int64x2& c)
|
||||
{ return v_dotprod_expand(a, b) + c; }
|
||||
|
||||
// 32 >> 64f
|
||||
#if CV_SIMD128_64F
|
||||
inline v_float64x2 v_dotprod_expand(const v_int32x4& a, const v_int32x4& b)
|
||||
{ return v_cvt_f64(v_dotprod(a, b)); }
|
||||
inline v_float64x2 v_dotprod_expand(const v_int32x4& a, const v_int32x4& b,
|
||||
const v_float64x2& c)
|
||||
{ return v_dotprod_expand(a, b) + c; }
|
||||
#endif
|
||||
|
||||
//////// Fast Dot Product ////////
|
||||
|
||||
// 16 >> 32
|
||||
inline v_int32x4 v_dotprod_fast(const v_int16x8& a, const v_int16x8& b)
|
||||
{
|
||||
#if CV_NEON_AARCH64
|
||||
int32x4_t p = vmull_s16(vget_low_s16(a.val), vget_low_s16(b.val));
|
||||
return v_int32x4(vmlal_high_s16(p, a.val, b.val));
|
||||
#else
|
||||
int16x4_t a0 = vget_low_s16(a.val);
|
||||
int16x4_t a1 = vget_high_s16(a.val);
|
||||
int16x4_t b0 = vget_low_s16(b.val);
|
||||
int16x4_t b1 = vget_high_s16(b.val);
|
||||
int32x4_t p = vmull_s16(a0, b0);
|
||||
return v_int32x4(vmlal_s16(p, a1, b1));
|
||||
#endif
|
||||
}
|
||||
inline v_int32x4 v_dotprod_fast(const v_int16x8& a, const v_int16x8& b, const v_int32x4& c)
|
||||
{
|
||||
#if CV_NEON_AARCH64
|
||||
int32x4_t p = vmlal_s16(c.val, vget_low_s16(a.val), vget_low_s16(b.val));
|
||||
return v_int32x4(vmlal_high_s16(p, a.val, b.val));
|
||||
#else
|
||||
int16x4_t a0 = vget_low_s16(a.val);
|
||||
int16x4_t a1 = vget_high_s16(a.val);
|
||||
int16x4_t b0 = vget_low_s16(b.val);
|
||||
int16x4_t b1 = vget_high_s16(b.val);
|
||||
int32x4_t p = vmlal_s16(c.val, a0, b0);
|
||||
return v_int32x4(vmlal_s16(p, a1, b1));
|
||||
#endif
|
||||
}
|
||||
|
||||
// 32 >> 64
|
||||
inline v_int64x2 v_dotprod_fast(const v_int32x4& a, const v_int32x4& b)
|
||||
{
|
||||
#if CV_NEON_AARCH64
|
||||
int64x2_t p = vmull_s32(vget_low_s32(a.val), vget_low_s32(b.val));
|
||||
return v_int64x2(vmlal_high_s32(p, a.val, b.val));
|
||||
#else
|
||||
int32x2_t a0 = vget_low_s32(a.val);
|
||||
int32x2_t a1 = vget_high_s32(a.val);
|
||||
int32x2_t b0 = vget_low_s32(b.val);
|
||||
int32x2_t b1 = vget_high_s32(b.val);
|
||||
int64x2_t p = vmull_s32(a0, b0);
|
||||
return v_int64x2(vmlal_s32(p, a1, b1));
|
||||
#endif
|
||||
}
|
||||
inline v_int64x2 v_dotprod_fast(const v_int32x4& a, const v_int32x4& b, const v_int64x2& c)
|
||||
{
|
||||
#if CV_NEON_AARCH64
|
||||
int64x2_t p = vmlal_s32(c.val, vget_low_s32(a.val), vget_low_s32(b.val));
|
||||
return v_int64x2(vmlal_high_s32(p, a.val, b.val));
|
||||
#else
|
||||
int32x2_t a0 = vget_low_s32(a.val);
|
||||
int32x2_t a1 = vget_high_s32(a.val);
|
||||
int32x2_t b0 = vget_low_s32(b.val);
|
||||
int32x2_t b1 = vget_high_s32(b.val);
|
||||
int64x2_t p = vmlal_s32(c.val, a0, b0);
|
||||
return v_int64x2(vmlal_s32(p, a1, b1));
|
||||
#endif
|
||||
}
|
||||
|
||||
// 8 >> 32
|
||||
inline v_uint32x4 v_dotprod_expand_fast(const v_uint8x16& a, const v_uint8x16& b)
|
||||
{
|
||||
#if CV_NEON_DOT
|
||||
return v_uint32x4(vdotq_u32(vdupq_n_u32(0), a.val, b.val));
|
||||
#else
|
||||
uint16x8_t p0 = vmull_u8(vget_low_u8(a.val), vget_low_u8(b.val));
|
||||
uint16x8_t p1 = vmull_u8(vget_high_u8(a.val), vget_high_u8(b.val));
|
||||
uint32x4_t s0 = vaddl_u16(vget_low_u16(p0), vget_low_u16(p1));
|
||||
uint32x4_t s1 = vaddl_u16(vget_high_u16(p0), vget_high_u16(p1));
|
||||
return v_uint32x4(vaddq_u32(s0, s1));
|
||||
#endif
|
||||
}
|
||||
inline v_uint32x4 v_dotprod_expand_fast(const v_uint8x16& a, const v_uint8x16& b, const v_uint32x4& c)
|
||||
{
|
||||
#if CV_NEON_DOT
|
||||
return v_uint32x4(vdotq_u32(c.val, a.val, b.val));
|
||||
#else
|
||||
return v_dotprod_expand_fast(a, b) + c;
|
||||
#endif
|
||||
}
|
||||
|
||||
inline v_int32x4 v_dotprod_expand_fast(const v_int8x16& a, const v_int8x16& b)
|
||||
{
|
||||
#if CV_NEON_DOT
|
||||
return v_int32x4(vdotq_s32(vdupq_n_s32(0), a.val, b.val));
|
||||
#else
|
||||
int16x8_t prod = vmull_s8(vget_low_s8(a.val), vget_low_s8(b.val));
|
||||
prod = vmlal_s8(prod, vget_high_s8(a.val), vget_high_s8(b.val));
|
||||
return v_int32x4(vaddl_s16(vget_low_s16(prod), vget_high_s16(prod)));
|
||||
#endif
|
||||
}
|
||||
inline v_int32x4 v_dotprod_expand_fast(const v_int8x16& a, const v_int8x16& b, const v_int32x4& c)
|
||||
{
|
||||
#if CV_NEON_DOT
|
||||
return v_int32x4(vdotq_s32(c.val, a.val, b.val));
|
||||
#else
|
||||
return v_dotprod_expand_fast(a, b) + c;
|
||||
#endif
|
||||
}
|
||||
|
||||
// 16 >> 64
|
||||
inline v_uint64x2 v_dotprod_expand_fast(const v_uint16x8& a, const v_uint16x8& b)
|
||||
{
|
||||
uint32x4_t p0 = vmull_u16(vget_low_u16(a.val), vget_low_u16(b.val));
|
||||
uint32x4_t p1 = vmull_u16(vget_high_u16(a.val), vget_high_u16(b.val));
|
||||
uint64x2_t s0 = vaddl_u32(vget_low_u32(p0), vget_high_u32(p0));
|
||||
uint64x2_t s1 = vaddl_u32(vget_low_u32(p1), vget_high_u32(p1));
|
||||
return v_uint64x2(vaddq_u64(s0, s1));
|
||||
}
|
||||
inline v_uint64x2 v_dotprod_expand_fast(const v_uint16x8& a, const v_uint16x8& b, const v_uint64x2& c)
|
||||
{ return v_dotprod_expand_fast(a, b) + c; }
|
||||
|
||||
inline v_int64x2 v_dotprod_expand_fast(const v_int16x8& a, const v_int16x8& b)
|
||||
{
|
||||
int32x4_t prod = vmull_s16(vget_low_s16(a.val), vget_low_s16(b.val));
|
||||
prod = vmlal_s16(prod, vget_high_s16(a.val), vget_high_s16(b.val));
|
||||
return v_int64x2(vaddl_s32(vget_low_s32(prod), vget_high_s32(prod)));
|
||||
}
|
||||
inline v_int64x2 v_dotprod_expand_fast(const v_int16x8& a, const v_int16x8& b, const v_int64x2& c)
|
||||
{ return v_dotprod_expand_fast(a, b) + c; }
|
||||
|
||||
// 32 >> 64f
|
||||
#if CV_SIMD128_64F
|
||||
inline v_float64x2 v_dotprod_expand_fast(const v_int32x4& a, const v_int32x4& b)
|
||||
{ return v_cvt_f64(v_dotprod_fast(a, b)); }
|
||||
inline v_float64x2 v_dotprod_expand_fast(const v_int32x4& a, const v_int32x4& b, const v_float64x2& c)
|
||||
{ return v_dotprod_expand_fast(a, b) + c; }
|
||||
#endif
|
||||
|
||||
|
||||
#define OPENCV_HAL_IMPL_NEON_LOGIC_OP(_Tpvec, suffix) \
|
||||
OPENCV_HAL_IMPL_NEON_BIN_OP(&, _Tpvec, vandq_##suffix) \
|
||||
OPENCV_HAL_IMPL_NEON_BIN_OP(|, _Tpvec, vorrq_##suffix) \
|
||||
|
@ -917,13 +1203,27 @@ OPENCV_HAL_IMPL_NEON_ROTATE_OP(v_int64x2, s64)
|
|||
OPENCV_HAL_IMPL_NEON_ROTATE_OP(v_float64x2, f64)
|
||||
#endif
|
||||
|
||||
#if defined(__clang__) && defined(__aarch64__)
|
||||
// avoid LD2 instruction. details: https://github.com/opencv/opencv/issues/14863
|
||||
#define OPENCV_HAL_IMPL_NEON_LOAD_LOW_OP(_Tpvec, _Tp, suffix) \
|
||||
inline _Tpvec v_load_low(const _Tp* ptr) \
|
||||
{ \
|
||||
typedef uint64 CV_DECL_ALIGNED(1) unaligned_uint64; \
|
||||
uint64 v = *(unaligned_uint64*)ptr; \
|
||||
return _Tpvec(v_reinterpret_as_##suffix(v_uint64x2(v, (uint64)123456))); \
|
||||
}
|
||||
#else
|
||||
#define OPENCV_HAL_IMPL_NEON_LOAD_LOW_OP(_Tpvec, _Tp, suffix) \
|
||||
inline _Tpvec v_load_low(const _Tp* ptr) \
|
||||
{ return _Tpvec(vcombine_##suffix(vld1_##suffix(ptr), vdup_n_##suffix((_Tp)0))); }
|
||||
#endif
|
||||
|
||||
#define OPENCV_HAL_IMPL_NEON_LOADSTORE_OP(_Tpvec, _Tp, suffix) \
|
||||
inline _Tpvec v_load(const _Tp* ptr) \
|
||||
{ return _Tpvec(vld1q_##suffix(ptr)); } \
|
||||
inline _Tpvec v_load_aligned(const _Tp* ptr) \
|
||||
{ return _Tpvec(vld1q_##suffix(ptr)); } \
|
||||
inline _Tpvec v_load_low(const _Tp* ptr) \
|
||||
{ return _Tpvec(vcombine_##suffix(vld1_##suffix(ptr), vdup_n_##suffix((_Tp)0))); } \
|
||||
OPENCV_HAL_IMPL_NEON_LOAD_LOW_OP(_Tpvec, _Tp, suffix) \
|
||||
inline _Tpvec v_load_halves(const _Tp* ptr0, const _Tp* ptr1) \
|
||||
{ return _Tpvec(vcombine_##suffix(vld1_##suffix(ptr0), vld1_##suffix(ptr1))); } \
|
||||
inline void v_store(_Tp* ptr, const _Tpvec& a) \
|
||||
|
@ -952,6 +1252,45 @@ OPENCV_HAL_IMPL_NEON_LOADSTORE_OP(v_float32x4, float, f32)
|
|||
OPENCV_HAL_IMPL_NEON_LOADSTORE_OP(v_float64x2, double, f64)
|
||||
#endif
|
||||
|
||||
inline unsigned v_reduce_sum(const v_uint8x16& a)
|
||||
{
|
||||
uint32x4_t t0 = vpaddlq_u16(vpaddlq_u8(a.val));
|
||||
uint32x2_t t1 = vpadd_u32(vget_low_u32(t0), vget_high_u32(t0));
|
||||
return vget_lane_u32(vpadd_u32(t1, t1), 0);
|
||||
}
|
||||
inline int v_reduce_sum(const v_int8x16& a)
|
||||
{
|
||||
int32x4_t t0 = vpaddlq_s16(vpaddlq_s8(a.val));
|
||||
int32x2_t t1 = vpadd_s32(vget_low_s32(t0), vget_high_s32(t0));
|
||||
return vget_lane_s32(vpadd_s32(t1, t1), 0);
|
||||
}
|
||||
inline unsigned v_reduce_sum(const v_uint16x8& a)
|
||||
{
|
||||
uint32x4_t t0 = vpaddlq_u16(a.val);
|
||||
uint32x2_t t1 = vpadd_u32(vget_low_u32(t0), vget_high_u32(t0));
|
||||
return vget_lane_u32(vpadd_u32(t1, t1), 0);
|
||||
}
|
||||
inline int v_reduce_sum(const v_int16x8& a)
|
||||
{
|
||||
int32x4_t t0 = vpaddlq_s16(a.val);
|
||||
int32x2_t t1 = vpadd_s32(vget_low_s32(t0), vget_high_s32(t0));
|
||||
return vget_lane_s32(vpadd_s32(t1, t1), 0);
|
||||
}
|
||||
|
||||
#define OPENCV_HAL_IMPL_NEON_REDUCE_OP_16(_Tpvec, _Tpnvec, scalartype, func, vectorfunc, suffix) \
|
||||
inline scalartype v_reduce_##func(const _Tpvec& a) \
|
||||
{ \
|
||||
_Tpnvec##_t a0 = vp##vectorfunc##_##suffix(vget_low_##suffix(a.val), vget_high_##suffix(a.val)); \
|
||||
a0 = vp##vectorfunc##_##suffix(a0, a0); \
|
||||
a0 = vp##vectorfunc##_##suffix(a0, a0); \
|
||||
return (scalartype)vget_lane_##suffix(vp##vectorfunc##_##suffix(a0, a0),0); \
|
||||
}
|
||||
|
||||
OPENCV_HAL_IMPL_NEON_REDUCE_OP_16(v_uint8x16, uint8x8, uchar, max, max, u8)
|
||||
OPENCV_HAL_IMPL_NEON_REDUCE_OP_16(v_uint8x16, uint8x8, uchar, min, min, u8)
|
||||
OPENCV_HAL_IMPL_NEON_REDUCE_OP_16(v_int8x16, int8x8, schar, max, max, s8)
|
||||
OPENCV_HAL_IMPL_NEON_REDUCE_OP_16(v_int8x16, int8x8, schar, min, min, s8)
|
||||
|
||||
#define OPENCV_HAL_IMPL_NEON_REDUCE_OP_8(_Tpvec, _Tpnvec, scalartype, func, vectorfunc, suffix) \
|
||||
inline scalartype v_reduce_##func(const _Tpvec& a) \
|
||||
{ \
|
||||
|
@ -960,10 +1299,8 @@ inline scalartype v_reduce_##func(const _Tpvec& a) \
|
|||
return (scalartype)vget_lane_##suffix(vp##vectorfunc##_##suffix(a0, a0),0); \
|
||||
}
|
||||
|
||||
OPENCV_HAL_IMPL_NEON_REDUCE_OP_8(v_uint16x8, uint16x4, unsigned short, sum, add, u16)
|
||||
OPENCV_HAL_IMPL_NEON_REDUCE_OP_8(v_uint16x8, uint16x4, unsigned short, max, max, u16)
|
||||
OPENCV_HAL_IMPL_NEON_REDUCE_OP_8(v_uint16x8, uint16x4, unsigned short, min, min, u16)
|
||||
OPENCV_HAL_IMPL_NEON_REDUCE_OP_8(v_int16x8, int16x4, short, sum, add, s16)
|
||||
OPENCV_HAL_IMPL_NEON_REDUCE_OP_8(v_uint16x8, uint16x4, ushort, max, max, u16)
|
||||
OPENCV_HAL_IMPL_NEON_REDUCE_OP_8(v_uint16x8, uint16x4, ushort, min, min, u16)
|
||||
OPENCV_HAL_IMPL_NEON_REDUCE_OP_8(v_int16x8, int16x4, short, max, max, s16)
|
||||
OPENCV_HAL_IMPL_NEON_REDUCE_OP_8(v_int16x8, int16x4, short, min, min, s16)
|
||||
|
||||
|
@ -984,10 +1321,14 @@ OPENCV_HAL_IMPL_NEON_REDUCE_OP_4(v_float32x4, float32x2, float, sum, add, f32)
|
|||
OPENCV_HAL_IMPL_NEON_REDUCE_OP_4(v_float32x4, float32x2, float, max, max, f32)
|
||||
OPENCV_HAL_IMPL_NEON_REDUCE_OP_4(v_float32x4, float32x2, float, min, min, f32)
|
||||
|
||||
inline uint64 v_reduce_sum(const v_uint64x2& a)
|
||||
{ return vget_lane_u64(vadd_u64(vget_low_u64(a.val), vget_high_u64(a.val)),0); }
|
||||
inline int64 v_reduce_sum(const v_int64x2& a)
|
||||
{ return vget_lane_s64(vadd_s64(vget_low_s64(a.val), vget_high_s64(a.val)),0); }
|
||||
#if CV_SIMD128_64F
|
||||
inline double v_reduce_sum(const v_float64x2& a)
|
||||
{
|
||||
return vgetq_lane_f64(a.val, 0) + vgetq_lane_f64(a.val, 1);
|
||||
return vaddvq_f64(a.val);
|
||||
}
|
||||
#endif
|
||||
|
||||
|
@ -1049,21 +1390,22 @@ inline float v_reduce_sad(const v_float32x4& a, const v_float32x4& b)
|
|||
return vget_lane_f32(vpadd_f32(t1, t1), 0);
|
||||
}
|
||||
|
||||
#define OPENCV_HAL_IMPL_NEON_POPCOUNT(_Tpvec, cast) \
|
||||
inline v_uint32x4 v_popcount(const _Tpvec& a) \
|
||||
{ \
|
||||
uint8x16_t t = vcntq_u8(cast(a.val)); \
|
||||
uint16x8_t t0 = vpaddlq_u8(t); /* 16 -> 8 */ \
|
||||
uint32x4_t t1 = vpaddlq_u16(t0); /* 8 -> 4 */ \
|
||||
return v_uint32x4(t1); \
|
||||
}
|
||||
|
||||
OPENCV_HAL_IMPL_NEON_POPCOUNT(v_uint8x16, OPENCV_HAL_NOP)
|
||||
OPENCV_HAL_IMPL_NEON_POPCOUNT(v_uint16x8, vreinterpretq_u8_u16)
|
||||
OPENCV_HAL_IMPL_NEON_POPCOUNT(v_uint32x4, vreinterpretq_u8_u32)
|
||||
OPENCV_HAL_IMPL_NEON_POPCOUNT(v_int8x16, vreinterpretq_u8_s8)
|
||||
OPENCV_HAL_IMPL_NEON_POPCOUNT(v_int16x8, vreinterpretq_u8_s16)
|
||||
OPENCV_HAL_IMPL_NEON_POPCOUNT(v_int32x4, vreinterpretq_u8_s32)
|
||||
inline v_uint8x16 v_popcount(const v_uint8x16& a)
|
||||
{ return v_uint8x16(vcntq_u8(a.val)); }
|
||||
inline v_uint8x16 v_popcount(const v_int8x16& a)
|
||||
{ return v_uint8x16(vcntq_u8(vreinterpretq_u8_s8(a.val))); }
|
||||
inline v_uint16x8 v_popcount(const v_uint16x8& a)
|
||||
{ return v_uint16x8(vpaddlq_u8(vcntq_u8(vreinterpretq_u8_u16(a.val)))); }
|
||||
inline v_uint16x8 v_popcount(const v_int16x8& a)
|
||||
{ return v_uint16x8(vpaddlq_u8(vcntq_u8(vreinterpretq_u8_s16(a.val)))); }
|
||||
inline v_uint32x4 v_popcount(const v_uint32x4& a)
|
||||
{ return v_uint32x4(vpaddlq_u16(vpaddlq_u8(vcntq_u8(vreinterpretq_u8_u32(a.val))))); }
|
||||
inline v_uint32x4 v_popcount(const v_int32x4& a)
|
||||
{ return v_uint32x4(vpaddlq_u16(vpaddlq_u8(vcntq_u8(vreinterpretq_u8_s32(a.val))))); }
|
||||
inline v_uint64x2 v_popcount(const v_uint64x2& a)
|
||||
{ return v_uint64x2(vpaddlq_u32(vpaddlq_u16(vpaddlq_u8(vcntq_u8(vreinterpretq_u8_u64(a.val)))))); }
|
||||
inline v_uint64x2 v_popcount(const v_int64x2& a)
|
||||
{ return v_uint64x2(vpaddlq_u32(vpaddlq_u16(vpaddlq_u8(vcntq_u8(vreinterpretq_u8_s64(a.val)))))); }
|
||||
|
||||
inline int v_signmask(const v_uint8x16& a)
|
||||
{
|
||||
|
@ -1096,17 +1438,32 @@ inline int v_signmask(const v_int32x4& a)
|
|||
{ return v_signmask(v_reinterpret_as_u32(a)); }
|
||||
inline int v_signmask(const v_float32x4& a)
|
||||
{ return v_signmask(v_reinterpret_as_u32(a)); }
|
||||
#if CV_SIMD128_64F
|
||||
inline int v_signmask(const v_uint64x2& a)
|
||||
{
|
||||
int64x1_t m0 = vdup_n_s64(0);
|
||||
uint64x2_t v0 = vshlq_u64(vshrq_n_u64(a.val, 63), vcombine_s64(m0, m0));
|
||||
return (int)vgetq_lane_u64(v0, 0) + ((int)vgetq_lane_u64(v0, 1) << 1);
|
||||
}
|
||||
inline int v_signmask(const v_int64x2& a)
|
||||
{ return v_signmask(v_reinterpret_as_u64(a)); }
|
||||
#if CV_SIMD128_64F
|
||||
inline int v_signmask(const v_float64x2& a)
|
||||
{ return v_signmask(v_reinterpret_as_u64(a)); }
|
||||
#endif
|
||||
|
||||
inline int v_scan_forward(const v_int8x16& a) { return trailingZeros32(v_signmask(a)); }
|
||||
inline int v_scan_forward(const v_uint8x16& a) { return trailingZeros32(v_signmask(a)); }
|
||||
inline int v_scan_forward(const v_int16x8& a) { return trailingZeros32(v_signmask(a)); }
|
||||
inline int v_scan_forward(const v_uint16x8& a) { return trailingZeros32(v_signmask(a)); }
|
||||
inline int v_scan_forward(const v_int32x4& a) { return trailingZeros32(v_signmask(a)); }
|
||||
inline int v_scan_forward(const v_uint32x4& a) { return trailingZeros32(v_signmask(a)); }
|
||||
inline int v_scan_forward(const v_float32x4& a) { return trailingZeros32(v_signmask(a)); }
|
||||
inline int v_scan_forward(const v_int64x2& a) { return trailingZeros32(v_signmask(a)); }
|
||||
inline int v_scan_forward(const v_uint64x2& a) { return trailingZeros32(v_signmask(a)); }
|
||||
#if CV_SIMD128_64F
|
||||
inline int v_scan_forward(const v_float64x2& a) { return trailingZeros32(v_signmask(a)); }
|
||||
#endif
|
||||
|
||||
#define OPENCV_HAL_IMPL_NEON_CHECK_ALLANY(_Tpvec, suffix, shift) \
|
||||
inline bool v_check_all(const v_##_Tpvec& a) \
|
||||
{ \
|
||||
|
@ -1124,9 +1481,17 @@ inline bool v_check_any(const v_##_Tpvec& a) \
|
|||
OPENCV_HAL_IMPL_NEON_CHECK_ALLANY(uint8x16, u8, 7)
|
||||
OPENCV_HAL_IMPL_NEON_CHECK_ALLANY(uint16x8, u16, 15)
|
||||
OPENCV_HAL_IMPL_NEON_CHECK_ALLANY(uint32x4, u32, 31)
|
||||
#if CV_SIMD128_64F
|
||||
OPENCV_HAL_IMPL_NEON_CHECK_ALLANY(uint64x2, u64, 63)
|
||||
#endif
|
||||
|
||||
inline bool v_check_all(const v_uint64x2& a)
|
||||
{
|
||||
uint64x2_t v0 = vshrq_n_u64(a.val, 63);
|
||||
return (vgetq_lane_u64(v0, 0) & vgetq_lane_u64(v0, 1)) == 1;
|
||||
}
|
||||
inline bool v_check_any(const v_uint64x2& a)
|
||||
{
|
||||
uint64x2_t v0 = vshrq_n_u64(a.val, 63);
|
||||
return (vgetq_lane_u64(v0, 0) | vgetq_lane_u64(v0, 1)) != 0;
|
||||
}
|
||||
|
||||
inline bool v_check_all(const v_int8x16& a)
|
||||
{ return v_check_all(v_reinterpret_as_u8(a)); }
|
||||
|
@ -1146,13 +1511,13 @@ inline bool v_check_any(const v_int32x4& a)
|
|||
inline bool v_check_any(const v_float32x4& a)
|
||||
{ return v_check_any(v_reinterpret_as_u32(a)); }
|
||||
|
||||
#if CV_SIMD128_64F
|
||||
inline bool v_check_all(const v_int64x2& a)
|
||||
{ return v_check_all(v_reinterpret_as_u64(a)); }
|
||||
inline bool v_check_all(const v_float64x2& a)
|
||||
{ return v_check_all(v_reinterpret_as_u64(a)); }
|
||||
inline bool v_check_any(const v_int64x2& a)
|
||||
{ return v_check_any(v_reinterpret_as_u64(a)); }
|
||||
#if CV_SIMD128_64F
|
||||
inline bool v_check_all(const v_float64x2& a)
|
||||
{ return v_check_all(v_reinterpret_as_u64(a)); }
|
||||
inline bool v_check_any(const v_float64x2& a)
|
||||
{ return v_check_any(v_reinterpret_as_u64(a)); }
|
||||
#endif
|
||||
|
@ -1174,6 +1539,26 @@ OPENCV_HAL_IMPL_NEON_SELECT(v_float32x4, f32, u32)
|
|||
OPENCV_HAL_IMPL_NEON_SELECT(v_float64x2, f64, u64)
|
||||
#endif
|
||||
|
||||
#if CV_NEON_AARCH64
|
||||
#define OPENCV_HAL_IMPL_NEON_EXPAND(_Tpvec, _Tpwvec, _Tp, suffix) \
|
||||
inline void v_expand(const _Tpvec& a, _Tpwvec& b0, _Tpwvec& b1) \
|
||||
{ \
|
||||
b0.val = vmovl_##suffix(vget_low_##suffix(a.val)); \
|
||||
b1.val = vmovl_high_##suffix(a.val); \
|
||||
} \
|
||||
inline _Tpwvec v_expand_low(const _Tpvec& a) \
|
||||
{ \
|
||||
return _Tpwvec(vmovl_##suffix(vget_low_##suffix(a.val))); \
|
||||
} \
|
||||
inline _Tpwvec v_expand_high(const _Tpvec& a) \
|
||||
{ \
|
||||
return _Tpwvec(vmovl_high_##suffix(a.val)); \
|
||||
} \
|
||||
inline _Tpwvec v_load_expand(const _Tp* ptr) \
|
||||
{ \
|
||||
return _Tpwvec(vmovl_##suffix(vld1_##suffix(ptr))); \
|
||||
}
|
||||
#else
|
||||
#define OPENCV_HAL_IMPL_NEON_EXPAND(_Tpvec, _Tpwvec, _Tp, suffix) \
|
||||
inline void v_expand(const _Tpvec& a, _Tpwvec& b0, _Tpwvec& b1) \
|
||||
{ \
|
||||
|
@ -1192,6 +1577,7 @@ inline _Tpwvec v_load_expand(const _Tp* ptr) \
|
|||
{ \
|
||||
return _Tpwvec(vmovl_##suffix(vld1_##suffix(ptr))); \
|
||||
}
|
||||
#endif
|
||||
|
||||
OPENCV_HAL_IMPL_NEON_EXPAND(v_uint8x16, v_uint16x8, uchar, u8)
|
||||
OPENCV_HAL_IMPL_NEON_EXPAND(v_int8x16, v_int16x8, schar, s8)
|
||||
|
@ -1216,7 +1602,7 @@ inline v_int32x4 v_load_expand_q(const schar* ptr)
|
|||
return v_int32x4(vmovl_s16(v1));
|
||||
}
|
||||
|
||||
#if defined(__aarch64__)
|
||||
#if defined(__aarch64__) || defined(_M_ARM64)
|
||||
#define OPENCV_HAL_IMPL_NEON_UNPACKS(_Tpvec, suffix) \
|
||||
inline void v_zip(const v_##_Tpvec& a0, const v_##_Tpvec& a1, v_##_Tpvec& b0, v_##_Tpvec& b1) \
|
||||
{ \
|
||||
|
@ -1270,6 +1656,52 @@ OPENCV_HAL_IMPL_NEON_UNPACKS(float32x4, f32)
|
|||
OPENCV_HAL_IMPL_NEON_UNPACKS(float64x2, f64)
|
||||
#endif
|
||||
|
||||
inline v_uint8x16 v_reverse(const v_uint8x16 &a)
|
||||
{
|
||||
uint8x16_t vec = vrev64q_u8(a.val);
|
||||
return v_uint8x16(vextq_u8(vec, vec, 8));
|
||||
}
|
||||
|
||||
inline v_int8x16 v_reverse(const v_int8x16 &a)
|
||||
{ return v_reinterpret_as_s8(v_reverse(v_reinterpret_as_u8(a))); }
|
||||
|
||||
inline v_uint16x8 v_reverse(const v_uint16x8 &a)
|
||||
{
|
||||
uint16x8_t vec = vrev64q_u16(a.val);
|
||||
return v_uint16x8(vextq_u16(vec, vec, 4));
|
||||
}
|
||||
|
||||
inline v_int16x8 v_reverse(const v_int16x8 &a)
|
||||
{ return v_reinterpret_as_s16(v_reverse(v_reinterpret_as_u16(a))); }
|
||||
|
||||
inline v_uint32x4 v_reverse(const v_uint32x4 &a)
|
||||
{
|
||||
uint32x4_t vec = vrev64q_u32(a.val);
|
||||
return v_uint32x4(vextq_u32(vec, vec, 2));
|
||||
}
|
||||
|
||||
inline v_int32x4 v_reverse(const v_int32x4 &a)
|
||||
{ return v_reinterpret_as_s32(v_reverse(v_reinterpret_as_u32(a))); }
|
||||
|
||||
inline v_float32x4 v_reverse(const v_float32x4 &a)
|
||||
{ return v_reinterpret_as_f32(v_reverse(v_reinterpret_as_u32(a))); }
|
||||
|
||||
inline v_uint64x2 v_reverse(const v_uint64x2 &a)
|
||||
{
|
||||
uint64x2_t vec = a.val;
|
||||
uint64x1_t vec_lo = vget_low_u64(vec);
|
||||
uint64x1_t vec_hi = vget_high_u64(vec);
|
||||
return v_uint64x2(vcombine_u64(vec_hi, vec_lo));
|
||||
}
|
||||
|
||||
inline v_int64x2 v_reverse(const v_int64x2 &a)
|
||||
{ return v_reinterpret_as_s64(v_reverse(v_reinterpret_as_u64(a))); }
|
||||
|
||||
#if CV_SIMD128_64F
|
||||
inline v_float64x2 v_reverse(const v_float64x2 &a)
|
||||
{ return v_reinterpret_as_f64(v_reverse(v_reinterpret_as_u64(a))); }
|
||||
#endif
|
||||
|
||||
#define OPENCV_HAL_IMPL_NEON_EXTRACT(_Tpvec, suffix) \
|
||||
template <int s> \
|
||||
inline v_##_Tpvec v_extract(const v_##_Tpvec& a, const v_##_Tpvec& b) \
|
||||
|
@ -1290,6 +1722,38 @@ OPENCV_HAL_IMPL_NEON_EXTRACT(float32x4, f32)
|
|||
OPENCV_HAL_IMPL_NEON_EXTRACT(float64x2, f64)
|
||||
#endif
|
||||
|
||||
#define OPENCV_HAL_IMPL_NEON_EXTRACT_N(_Tpvec, _Tp, suffix) \
|
||||
template<int i> inline _Tp v_extract_n(_Tpvec v) { return vgetq_lane_##suffix(v.val, i); }
|
||||
|
||||
OPENCV_HAL_IMPL_NEON_EXTRACT_N(v_uint8x16, uchar, u8)
|
||||
OPENCV_HAL_IMPL_NEON_EXTRACT_N(v_int8x16, schar, s8)
|
||||
OPENCV_HAL_IMPL_NEON_EXTRACT_N(v_uint16x8, ushort, u16)
|
||||
OPENCV_HAL_IMPL_NEON_EXTRACT_N(v_int16x8, short, s16)
|
||||
OPENCV_HAL_IMPL_NEON_EXTRACT_N(v_uint32x4, uint, u32)
|
||||
OPENCV_HAL_IMPL_NEON_EXTRACT_N(v_int32x4, int, s32)
|
||||
OPENCV_HAL_IMPL_NEON_EXTRACT_N(v_uint64x2, uint64, u64)
|
||||
OPENCV_HAL_IMPL_NEON_EXTRACT_N(v_int64x2, int64, s64)
|
||||
OPENCV_HAL_IMPL_NEON_EXTRACT_N(v_float32x4, float, f32)
|
||||
#if CV_SIMD128_64F
|
||||
OPENCV_HAL_IMPL_NEON_EXTRACT_N(v_float64x2, double, f64)
|
||||
#endif
|
||||
|
||||
#define OPENCV_HAL_IMPL_NEON_BROADCAST(_Tpvec, _Tp, suffix) \
|
||||
template<int i> inline _Tpvec v_broadcast_element(_Tpvec v) { _Tp t = v_extract_n<i>(v); return v_setall_##suffix(t); }
|
||||
|
||||
OPENCV_HAL_IMPL_NEON_BROADCAST(v_uint8x16, uchar, u8)
|
||||
OPENCV_HAL_IMPL_NEON_BROADCAST(v_int8x16, schar, s8)
|
||||
OPENCV_HAL_IMPL_NEON_BROADCAST(v_uint16x8, ushort, u16)
|
||||
OPENCV_HAL_IMPL_NEON_BROADCAST(v_int16x8, short, s16)
|
||||
OPENCV_HAL_IMPL_NEON_BROADCAST(v_uint32x4, uint, u32)
|
||||
OPENCV_HAL_IMPL_NEON_BROADCAST(v_int32x4, int, s32)
|
||||
OPENCV_HAL_IMPL_NEON_BROADCAST(v_uint64x2, uint64, u64)
|
||||
OPENCV_HAL_IMPL_NEON_BROADCAST(v_int64x2, int64, s64)
|
||||
OPENCV_HAL_IMPL_NEON_BROADCAST(v_float32x4, float, f32)
|
||||
#if CV_SIMD128_64F
|
||||
OPENCV_HAL_IMPL_NEON_BROADCAST(v_float64x2, double, f64)
|
||||
#endif
|
||||
|
||||
#if CV_SIMD128_64F
|
||||
inline v_int32x4 v_round(const v_float32x4& a)
|
||||
{
|
||||
|
@ -1570,6 +2034,10 @@ inline v_float64x2 v_cvt_f64_high(const v_float32x4& a)
|
|||
{
|
||||
return v_float64x2(vcvt_f64_f32(vget_high_f32(a.val)));
|
||||
}
|
||||
|
||||
inline v_float64x2 v_cvt_f64(const v_int64x2& a)
|
||||
{ return v_float64x2(vcvtq_f64_s64(a.val)); }
|
||||
|
||||
#endif
|
||||
|
||||
////////////// Lookup table access ////////////////////
|
||||
|
@ -1732,10 +2200,12 @@ inline v_float32x4 v_lut(const float* tab, const int* idx)
|
|||
}
|
||||
inline v_float32x4 v_lut_pairs(const float* tab, const int* idx)
|
||||
{
|
||||
typedef uint64 CV_DECL_ALIGNED(1) unaligned_uint64;
|
||||
|
||||
uint64 CV_DECL_ALIGNED(32) elems[2] =
|
||||
{
|
||||
*(uint64*)(tab + idx[0]),
|
||||
*(uint64*)(tab + idx[1])
|
||||
*(unaligned_uint64*)(tab + idx[0]),
|
||||
*(unaligned_uint64*)(tab + idx[1])
|
||||
};
|
||||
return v_float32x4(vreinterpretq_f32_u64(vld1q_u64(elems)));
|
||||
}
|
||||
|
@ -1924,16 +2394,6 @@ inline void v_pack_store(float16_t* ptr, const v_float32x4& v)
|
|||
|
||||
inline void v_cleanup() {}
|
||||
|
||||
//! @name Check SIMD support
|
||||
//! @{
|
||||
//! @brief Check CPU capability of SIMD operation
|
||||
static inline bool hasSIMD128()
|
||||
{
|
||||
return (CV_CPU_HAS_SUPPORT_NEON) ? true : false;
|
||||
}
|
||||
|
||||
//! @}
|
||||
|
||||
CV_CPU_OPTIMIZATION_HAL_NAMESPACE_END
|
||||
|
||||
//! @endcond
|
|
@ -57,6 +57,14 @@ namespace cv
|
|||
|
||||
//! @cond IGNORED
|
||||
|
||||
//
|
||||
// Compilation troubleshooting:
|
||||
// - MSVC: error C2719: 'a': formal parameter with requested alignment of 16 won't be aligned
|
||||
// Replace parameter declaration to const reference:
|
||||
// -v_int32x4 a
|
||||
// +const v_int32x4& a
|
||||
//
|
||||
|
||||
CV_CPU_OPTIMIZATION_HAL_NAMESPACE_BEGIN
|
||||
|
||||
///////// Types ////////////
|
||||
|
@ -67,7 +75,8 @@ struct v_uint8x16
|
|||
typedef __m128i vector_type;
|
||||
enum { nlanes = 16 };
|
||||
|
||||
v_uint8x16() : val(_mm_setzero_si128()) {}
|
||||
/* coverity[uninit_ctor]: suppress warning */
|
||||
v_uint8x16() {}
|
||||
explicit v_uint8x16(__m128i v) : val(v) {}
|
||||
v_uint8x16(uchar v0, uchar v1, uchar v2, uchar v3, uchar v4, uchar v5, uchar v6, uchar v7,
|
||||
uchar v8, uchar v9, uchar v10, uchar v11, uchar v12, uchar v13, uchar v14, uchar v15)
|
||||
|
@ -77,6 +86,7 @@ struct v_uint8x16
|
|||
(char)v8, (char)v9, (char)v10, (char)v11,
|
||||
(char)v12, (char)v13, (char)v14, (char)v15);
|
||||
}
|
||||
|
||||
uchar get0() const
|
||||
{
|
||||
return (uchar)_mm_cvtsi128_si32(val);
|
||||
|
@ -91,7 +101,8 @@ struct v_int8x16
|
|||
typedef __m128i vector_type;
|
||||
enum { nlanes = 16 };
|
||||
|
||||
v_int8x16() : val(_mm_setzero_si128()) {}
|
||||
/* coverity[uninit_ctor]: suppress warning */
|
||||
v_int8x16() {}
|
||||
explicit v_int8x16(__m128i v) : val(v) {}
|
||||
v_int8x16(schar v0, schar v1, schar v2, schar v3, schar v4, schar v5, schar v6, schar v7,
|
||||
schar v8, schar v9, schar v10, schar v11, schar v12, schar v13, schar v14, schar v15)
|
||||
|
@ -101,6 +112,7 @@ struct v_int8x16
|
|||
(char)v8, (char)v9, (char)v10, (char)v11,
|
||||
(char)v12, (char)v13, (char)v14, (char)v15);
|
||||
}
|
||||
|
||||
schar get0() const
|
||||
{
|
||||
return (schar)_mm_cvtsi128_si32(val);
|
||||
|
@ -115,13 +127,15 @@ struct v_uint16x8
|
|||
typedef __m128i vector_type;
|
||||
enum { nlanes = 8 };
|
||||
|
||||
v_uint16x8() : val(_mm_setzero_si128()) {}
|
||||
/* coverity[uninit_ctor]: suppress warning */
|
||||
v_uint16x8() {}
|
||||
explicit v_uint16x8(__m128i v) : val(v) {}
|
||||
v_uint16x8(ushort v0, ushort v1, ushort v2, ushort v3, ushort v4, ushort v5, ushort v6, ushort v7)
|
||||
{
|
||||
val = _mm_setr_epi16((short)v0, (short)v1, (short)v2, (short)v3,
|
||||
(short)v4, (short)v5, (short)v6, (short)v7);
|
||||
}
|
||||
|
||||
ushort get0() const
|
||||
{
|
||||
return (ushort)_mm_cvtsi128_si32(val);
|
||||
|
@ -136,13 +150,15 @@ struct v_int16x8
|
|||
typedef __m128i vector_type;
|
||||
enum { nlanes = 8 };
|
||||
|
||||
v_int16x8() : val(_mm_setzero_si128()) {}
|
||||
/* coverity[uninit_ctor]: suppress warning */
|
||||
v_int16x8() {}
|
||||
explicit v_int16x8(__m128i v) : val(v) {}
|
||||
v_int16x8(short v0, short v1, short v2, short v3, short v4, short v5, short v6, short v7)
|
||||
{
|
||||
val = _mm_setr_epi16((short)v0, (short)v1, (short)v2, (short)v3,
|
||||
(short)v4, (short)v5, (short)v6, (short)v7);
|
||||
}
|
||||
|
||||
short get0() const
|
||||
{
|
||||
return (short)_mm_cvtsi128_si32(val);
|
||||
|
@ -157,12 +173,14 @@ struct v_uint32x4
|
|||
typedef __m128i vector_type;
|
||||
enum { nlanes = 4 };
|
||||
|
||||
v_uint32x4() : val(_mm_setzero_si128()) {}
|
||||
/* coverity[uninit_ctor]: suppress warning */
|
||||
v_uint32x4() {}
|
||||
explicit v_uint32x4(__m128i v) : val(v) {}
|
||||
v_uint32x4(unsigned v0, unsigned v1, unsigned v2, unsigned v3)
|
||||
{
|
||||
val = _mm_setr_epi32((int)v0, (int)v1, (int)v2, (int)v3);
|
||||
}
|
||||
|
||||
unsigned get0() const
|
||||
{
|
||||
return (unsigned)_mm_cvtsi128_si32(val);
|
||||
|
@ -177,12 +195,14 @@ struct v_int32x4
|
|||
typedef __m128i vector_type;
|
||||
enum { nlanes = 4 };
|
||||
|
||||
v_int32x4() : val(_mm_setzero_si128()) {}
|
||||
/* coverity[uninit_ctor]: suppress warning */
|
||||
v_int32x4() {}
|
||||
explicit v_int32x4(__m128i v) : val(v) {}
|
||||
v_int32x4(int v0, int v1, int v2, int v3)
|
||||
{
|
||||
val = _mm_setr_epi32(v0, v1, v2, v3);
|
||||
}
|
||||
|
||||
int get0() const
|
||||
{
|
||||
return _mm_cvtsi128_si32(val);
|
||||
|
@ -197,12 +217,14 @@ struct v_float32x4
|
|||
typedef __m128 vector_type;
|
||||
enum { nlanes = 4 };
|
||||
|
||||
v_float32x4() : val(_mm_setzero_ps()) {}
|
||||
/* coverity[uninit_ctor]: suppress warning */
|
||||
v_float32x4() {}
|
||||
explicit v_float32x4(__m128 v) : val(v) {}
|
||||
v_float32x4(float v0, float v1, float v2, float v3)
|
||||
{
|
||||
val = _mm_setr_ps(v0, v1, v2, v3);
|
||||
}
|
||||
|
||||
float get0() const
|
||||
{
|
||||
return _mm_cvtss_f32(val);
|
||||
|
@ -217,17 +239,23 @@ struct v_uint64x2
|
|||
typedef __m128i vector_type;
|
||||
enum { nlanes = 2 };
|
||||
|
||||
v_uint64x2() : val(_mm_setzero_si128()) {}
|
||||
/* coverity[uninit_ctor]: suppress warning */
|
||||
v_uint64x2() {}
|
||||
explicit v_uint64x2(__m128i v) : val(v) {}
|
||||
v_uint64x2(uint64 v0, uint64 v1)
|
||||
{
|
||||
val = _mm_setr_epi32((int)v0, (int)(v0 >> 32), (int)v1, (int)(v1 >> 32));
|
||||
}
|
||||
|
||||
uint64 get0() const
|
||||
{
|
||||
#if !defined(__x86_64__) && !defined(_M_X64)
|
||||
int a = _mm_cvtsi128_si32(val);
|
||||
int b = _mm_cvtsi128_si32(_mm_srli_epi64(val, 32));
|
||||
return (unsigned)a | ((uint64)(unsigned)b << 32);
|
||||
#else
|
||||
return (uint64)_mm_cvtsi128_si64(val);
|
||||
#endif
|
||||
}
|
||||
|
||||
__m128i val;
|
||||
|
@ -239,17 +267,23 @@ struct v_int64x2
|
|||
typedef __m128i vector_type;
|
||||
enum { nlanes = 2 };
|
||||
|
||||
v_int64x2() : val(_mm_setzero_si128()) {}
|
||||
/* coverity[uninit_ctor]: suppress warning */
|
||||
v_int64x2() {}
|
||||
explicit v_int64x2(__m128i v) : val(v) {}
|
||||
v_int64x2(int64 v0, int64 v1)
|
||||
{
|
||||
val = _mm_setr_epi32((int)v0, (int)(v0 >> 32), (int)v1, (int)(v1 >> 32));
|
||||
}
|
||||
|
||||
int64 get0() const
|
||||
{
|
||||
#if !defined(__x86_64__) && !defined(_M_X64)
|
||||
int a = _mm_cvtsi128_si32(val);
|
||||
int b = _mm_cvtsi128_si32(_mm_srli_epi64(val, 32));
|
||||
return (int64)((unsigned)a | ((uint64)(unsigned)b << 32));
|
||||
#else
|
||||
return _mm_cvtsi128_si64(val);
|
||||
#endif
|
||||
}
|
||||
|
||||
__m128i val;
|
||||
|
@ -261,12 +295,14 @@ struct v_float64x2
|
|||
typedef __m128d vector_type;
|
||||
enum { nlanes = 2 };
|
||||
|
||||
v_float64x2() : val(_mm_setzero_pd()) {}
|
||||
/* coverity[uninit_ctor]: suppress warning */
|
||||
v_float64x2() {}
|
||||
explicit v_float64x2(__m128d v) : val(v) {}
|
||||
v_float64x2(double v0, double v1)
|
||||
{
|
||||
val = _mm_setr_pd(v0, v1);
|
||||
}
|
||||
|
||||
double get0() const
|
||||
{
|
||||
return _mm_cvtsd_f64(val);
|
||||
|
@ -302,8 +338,8 @@ inline _Tpvec v_setall_##suffix(_Tp v) { return _Tpvec(_mm_set1_##ssuffix((_Tps)
|
|||
template<typename _Tpvec0> inline _Tpvec v_reinterpret_as_##suffix(const _Tpvec0& a) \
|
||||
{ return _Tpvec(cast(a.val)); }
|
||||
|
||||
OPENCV_HAL_IMPL_SSE_INITVEC(v_uint8x16, uchar, u8, si128, epi8, char, OPENCV_HAL_NOP)
|
||||
OPENCV_HAL_IMPL_SSE_INITVEC(v_int8x16, schar, s8, si128, epi8, char, OPENCV_HAL_NOP)
|
||||
OPENCV_HAL_IMPL_SSE_INITVEC(v_uint8x16, uchar, u8, si128, epi8, schar, OPENCV_HAL_NOP)
|
||||
OPENCV_HAL_IMPL_SSE_INITVEC(v_int8x16, schar, s8, si128, epi8, schar, OPENCV_HAL_NOP)
|
||||
OPENCV_HAL_IMPL_SSE_INITVEC(v_uint16x8, ushort, u16, si128, epi16, short, OPENCV_HAL_NOP)
|
||||
OPENCV_HAL_IMPL_SSE_INITVEC(v_int16x8, short, s16, si128, epi16, short, OPENCV_HAL_NOP)
|
||||
OPENCV_HAL_IMPL_SSE_INITVEC(v_uint32x4, unsigned, u32, si128, epi32, int, OPENCV_HAL_NOP)
|
||||
|
@ -791,15 +827,195 @@ inline void v_mul_expand(const v_uint32x4& a, const v_uint32x4& b,
|
|||
inline v_int16x8 v_mul_hi(const v_int16x8& a, const v_int16x8& b) { return v_int16x8(_mm_mulhi_epi16(a.val, b.val)); }
|
||||
inline v_uint16x8 v_mul_hi(const v_uint16x8& a, const v_uint16x8& b) { return v_uint16x8(_mm_mulhi_epu16(a.val, b.val)); }
|
||||
|
||||
inline v_int32x4 v_dotprod(const v_int16x8& a, const v_int16x8& b)
|
||||
{
|
||||
return v_int32x4(_mm_madd_epi16(a.val, b.val));
|
||||
}
|
||||
//////// Dot Product ////////
|
||||
|
||||
// 16 >> 32
|
||||
inline v_int32x4 v_dotprod(const v_int16x8& a, const v_int16x8& b)
|
||||
{ return v_int32x4(_mm_madd_epi16(a.val, b.val)); }
|
||||
inline v_int32x4 v_dotprod(const v_int16x8& a, const v_int16x8& b, const v_int32x4& c)
|
||||
{ return v_dotprod(a, b) + c; }
|
||||
|
||||
// 32 >> 64
|
||||
inline v_int64x2 v_dotprod(const v_int32x4& a, const v_int32x4& b)
|
||||
{
|
||||
return v_int32x4(_mm_add_epi32(_mm_madd_epi16(a.val, b.val), c.val));
|
||||
#if CV_SSE4_1
|
||||
__m128i even = _mm_mul_epi32(a.val, b.val);
|
||||
__m128i odd = _mm_mul_epi32(_mm_srli_epi64(a.val, 32), _mm_srli_epi64(b.val, 32));
|
||||
return v_int64x2(_mm_add_epi64(even, odd));
|
||||
#else
|
||||
__m128i even_u = _mm_mul_epu32(a.val, b.val);
|
||||
__m128i odd_u = _mm_mul_epu32(_mm_srli_epi64(a.val, 32), _mm_srli_epi64(b.val, 32));
|
||||
// convert unsigned to signed high multiplication (from: Agner Fog(veclib) and H S Warren: Hacker's delight, 2003, p. 132)
|
||||
__m128i a_sign = _mm_srai_epi32(a.val, 31);
|
||||
__m128i b_sign = _mm_srai_epi32(b.val, 31);
|
||||
// |x * sign of x
|
||||
__m128i axb = _mm_and_si128(a.val, b_sign);
|
||||
__m128i bxa = _mm_and_si128(b.val, a_sign);
|
||||
// sum of sign corrections
|
||||
__m128i ssum = _mm_add_epi32(bxa, axb);
|
||||
__m128i even_ssum = _mm_slli_epi64(ssum, 32);
|
||||
__m128i odd_ssum = _mm_and_si128(ssum, _mm_set_epi32(-1, 0, -1, 0));
|
||||
// convert to signed and prod
|
||||
return v_int64x2(_mm_add_epi64(_mm_sub_epi64(even_u, even_ssum), _mm_sub_epi64(odd_u, odd_ssum)));
|
||||
#endif
|
||||
}
|
||||
inline v_int64x2 v_dotprod(const v_int32x4& a, const v_int32x4& b, const v_int64x2& c)
|
||||
{ return v_dotprod(a, b) + c; }
|
||||
|
||||
// 8 >> 32
|
||||
inline v_uint32x4 v_dotprod_expand(const v_uint8x16& a, const v_uint8x16& b)
|
||||
{
|
||||
__m128i a0 = _mm_srli_epi16(_mm_slli_si128(a.val, 1), 8); // even
|
||||
__m128i a1 = _mm_srli_epi16(a.val, 8); // odd
|
||||
__m128i b0 = _mm_srli_epi16(_mm_slli_si128(b.val, 1), 8);
|
||||
__m128i b1 = _mm_srli_epi16(b.val, 8);
|
||||
__m128i p0 = _mm_madd_epi16(a0, b0);
|
||||
__m128i p1 = _mm_madd_epi16(a1, b1);
|
||||
return v_uint32x4(_mm_add_epi32(p0, p1));
|
||||
}
|
||||
inline v_uint32x4 v_dotprod_expand(const v_uint8x16& a, const v_uint8x16& b, const v_uint32x4& c)
|
||||
{ return v_dotprod_expand(a, b) + c; }
|
||||
|
||||
inline v_int32x4 v_dotprod_expand(const v_int8x16& a, const v_int8x16& b)
|
||||
{
|
||||
__m128i a0 = _mm_srai_epi16(_mm_slli_si128(a.val, 1), 8); // even
|
||||
__m128i a1 = _mm_srai_epi16(a.val, 8); // odd
|
||||
__m128i b0 = _mm_srai_epi16(_mm_slli_si128(b.val, 1), 8);
|
||||
__m128i b1 = _mm_srai_epi16(b.val, 8);
|
||||
__m128i p0 = _mm_madd_epi16(a0, b0);
|
||||
__m128i p1 = _mm_madd_epi16(a1, b1);
|
||||
return v_int32x4(_mm_add_epi32(p0, p1));
|
||||
}
|
||||
inline v_int32x4 v_dotprod_expand(const v_int8x16& a, const v_int8x16& b, const v_int32x4& c)
|
||||
{ return v_dotprod_expand(a, b) + c; }
|
||||
|
||||
// 16 >> 64
|
||||
inline v_uint64x2 v_dotprod_expand(const v_uint16x8& a, const v_uint16x8& b)
|
||||
{
|
||||
v_uint32x4 c, d;
|
||||
v_mul_expand(a, b, c, d);
|
||||
|
||||
v_uint64x2 c0, c1, d0, d1;
|
||||
v_expand(c, c0, c1);
|
||||
v_expand(d, d0, d1);
|
||||
|
||||
c0 += c1; d0 += d1;
|
||||
return v_uint64x2(_mm_add_epi64(
|
||||
_mm_unpacklo_epi64(c0.val, d0.val),
|
||||
_mm_unpackhi_epi64(c0.val, d0.val)
|
||||
));
|
||||
}
|
||||
inline v_uint64x2 v_dotprod_expand(const v_uint16x8& a, const v_uint16x8& b, const v_uint64x2& c)
|
||||
{ return v_dotprod_expand(a, b) + c; }
|
||||
|
||||
inline v_int64x2 v_dotprod_expand(const v_int16x8& a, const v_int16x8& b)
|
||||
{
|
||||
v_int32x4 prod = v_dotprod(a, b);
|
||||
v_int64x2 c, d;
|
||||
v_expand(prod, c, d);
|
||||
return v_int64x2(_mm_add_epi64(
|
||||
_mm_unpacklo_epi64(c.val, d.val),
|
||||
_mm_unpackhi_epi64(c.val, d.val)
|
||||
));
|
||||
}
|
||||
inline v_int64x2 v_dotprod_expand(const v_int16x8& a, const v_int16x8& b, const v_int64x2& c)
|
||||
{ return v_dotprod_expand(a, b) + c; }
|
||||
|
||||
// 32 >> 64f
|
||||
inline v_float64x2 v_dotprod_expand(const v_int32x4& a, const v_int32x4& b)
|
||||
{
|
||||
#if CV_SSE4_1
|
||||
return v_cvt_f64(v_dotprod(a, b));
|
||||
#else
|
||||
v_float64x2 c = v_cvt_f64(a) * v_cvt_f64(b);
|
||||
v_float64x2 d = v_cvt_f64_high(a) * v_cvt_f64_high(b);
|
||||
|
||||
return v_float64x2(_mm_add_pd(
|
||||
_mm_unpacklo_pd(c.val, d.val),
|
||||
_mm_unpackhi_pd(c.val, d.val)
|
||||
));
|
||||
#endif
|
||||
}
|
||||
inline v_float64x2 v_dotprod_expand(const v_int32x4& a, const v_int32x4& b, const v_float64x2& c)
|
||||
{ return v_dotprod_expand(a, b) + c; }
|
||||
|
||||
//////// Fast Dot Product ////////
|
||||
|
||||
// 16 >> 32
|
||||
inline v_int32x4 v_dotprod_fast(const v_int16x8& a, const v_int16x8& b)
|
||||
{ return v_dotprod(a, b); }
|
||||
inline v_int32x4 v_dotprod_fast(const v_int16x8& a, const v_int16x8& b, const v_int32x4& c)
|
||||
{ return v_dotprod(a, b) + c; }
|
||||
|
||||
// 32 >> 64
|
||||
inline v_int64x2 v_dotprod_fast(const v_int32x4& a, const v_int32x4& b)
|
||||
{ return v_dotprod(a, b); }
|
||||
inline v_int64x2 v_dotprod_fast(const v_int32x4& a, const v_int32x4& b, const v_int64x2& c)
|
||||
{ return v_dotprod_fast(a, b) + c; }
|
||||
|
||||
// 8 >> 32
|
||||
inline v_uint32x4 v_dotprod_expand_fast(const v_uint8x16& a, const v_uint8x16& b)
|
||||
{
|
||||
__m128i a0 = v_expand_low(a).val;
|
||||
__m128i a1 = v_expand_high(a).val;
|
||||
__m128i b0 = v_expand_low(b).val;
|
||||
__m128i b1 = v_expand_high(b).val;
|
||||
__m128i p0 = _mm_madd_epi16(a0, b0);
|
||||
__m128i p1 = _mm_madd_epi16(a1, b1);
|
||||
return v_uint32x4(_mm_add_epi32(p0, p1));
|
||||
}
|
||||
inline v_uint32x4 v_dotprod_expand_fast(const v_uint8x16& a, const v_uint8x16& b, const v_uint32x4& c)
|
||||
{ return v_dotprod_expand_fast(a, b) + c; }
|
||||
|
||||
inline v_int32x4 v_dotprod_expand_fast(const v_int8x16& a, const v_int8x16& b)
|
||||
{
|
||||
#if CV_SSE4_1
|
||||
__m128i a0 = _mm_cvtepi8_epi16(a.val);
|
||||
__m128i a1 = v_expand_high(a).val;
|
||||
__m128i b0 = _mm_cvtepi8_epi16(b.val);
|
||||
__m128i b1 = v_expand_high(b).val;
|
||||
__m128i p0 = _mm_madd_epi16(a0, b0);
|
||||
__m128i p1 = _mm_madd_epi16(a1, b1);
|
||||
return v_int32x4(_mm_add_epi32(p0, p1));
|
||||
#else
|
||||
return v_dotprod_expand(a, b);
|
||||
#endif
|
||||
}
|
||||
inline v_int32x4 v_dotprod_expand_fast(const v_int8x16& a, const v_int8x16& b, const v_int32x4& c)
|
||||
{ return v_dotprod_expand_fast(a, b) + c; }
|
||||
|
||||
// 16 >> 64
|
||||
inline v_uint64x2 v_dotprod_expand_fast(const v_uint16x8& a, const v_uint16x8& b)
|
||||
{
|
||||
v_uint32x4 c, d;
|
||||
v_mul_expand(a, b, c, d);
|
||||
|
||||
v_uint64x2 c0, c1, d0, d1;
|
||||
v_expand(c, c0, c1);
|
||||
v_expand(d, d0, d1);
|
||||
|
||||
c0 += c1; d0 += d1;
|
||||
return c0 + d0;
|
||||
}
|
||||
inline v_uint64x2 v_dotprod_expand_fast(const v_uint16x8& a, const v_uint16x8& b, const v_uint64x2& c)
|
||||
{ return v_dotprod_expand_fast(a, b) + c; }
|
||||
|
||||
inline v_int64x2 v_dotprod_expand_fast(const v_int16x8& a, const v_int16x8& b)
|
||||
{
|
||||
v_int32x4 prod = v_dotprod(a, b);
|
||||
v_int64x2 c, d;
|
||||
v_expand(prod, c, d);
|
||||
return c + d;
|
||||
}
|
||||
inline v_int64x2 v_dotprod_expand_fast(const v_int16x8& a, const v_int16x8& b, const v_int64x2& c)
|
||||
{ return v_dotprod_expand_fast(a, b) + c; }
|
||||
|
||||
// 32 >> 64f
|
||||
v_float64x2 v_fma(const v_float64x2& a, const v_float64x2& b, const v_float64x2& c);
|
||||
inline v_float64x2 v_dotprod_expand_fast(const v_int32x4& a, const v_int32x4& b)
|
||||
{ return v_fma(v_cvt_f64(a), v_cvt_f64(b), v_cvt_f64_high(a) * v_cvt_f64_high(b)); }
|
||||
inline v_float64x2 v_dotprod_expand_fast(const v_int32x4& a, const v_int32x4& b, const v_float64x2& c)
|
||||
{ return v_fma(v_cvt_f64(a), v_cvt_f64(b), v_fma(v_cvt_f64_high(a), v_cvt_f64_high(b), c)); }
|
||||
|
||||
#define OPENCV_HAL_IMPL_SSE_LOGIC_OP(_Tpvec, suffix, not_const) \
|
||||
OPENCV_HAL_IMPL_SSE_BIN_OP(&, _Tpvec, _mm_and_##suffix) \
|
||||
|
@ -1032,14 +1248,23 @@ inline _Tpvec operator >= (const _Tpvec& a, const _Tpvec& b) \
|
|||
OPENCV_HAL_IMPL_SSE_FLT_CMP_OP(v_float32x4, ps)
|
||||
OPENCV_HAL_IMPL_SSE_FLT_CMP_OP(v_float64x2, pd)
|
||||
|
||||
#define OPENCV_HAL_IMPL_SSE_64BIT_CMP_OP(_Tpvec, cast) \
|
||||
#if CV_SSE4_1
|
||||
#define OPENCV_HAL_IMPL_SSE_64BIT_CMP_OP(_Tpvec) \
|
||||
inline _Tpvec operator == (const _Tpvec& a, const _Tpvec& b) \
|
||||
{ return cast(v_reinterpret_as_f64(a) == v_reinterpret_as_f64(b)); } \
|
||||
{ return _Tpvec(_mm_cmpeq_epi64(a.val, b.val)); } \
|
||||
inline _Tpvec operator != (const _Tpvec& a, const _Tpvec& b) \
|
||||
{ return cast(v_reinterpret_as_f64(a) != v_reinterpret_as_f64(b)); }
|
||||
{ return ~(a == b); }
|
||||
#else
|
||||
#define OPENCV_HAL_IMPL_SSE_64BIT_CMP_OP(_Tpvec) \
|
||||
inline _Tpvec operator == (const _Tpvec& a, const _Tpvec& b) \
|
||||
{ __m128i cmp = _mm_cmpeq_epi32(a.val, b.val); \
|
||||
return _Tpvec(_mm_and_si128(cmp, _mm_shuffle_epi32(cmp, _MM_SHUFFLE(2, 3, 0, 1)))); } \
|
||||
inline _Tpvec operator != (const _Tpvec& a, const _Tpvec& b) \
|
||||
{ return ~(a == b); }
|
||||
#endif
|
||||
|
||||
OPENCV_HAL_IMPL_SSE_64BIT_CMP_OP(v_uint64x2, v_reinterpret_as_u64)
|
||||
OPENCV_HAL_IMPL_SSE_64BIT_CMP_OP(v_int64x2, v_reinterpret_as_s64)
|
||||
OPENCV_HAL_IMPL_SSE_64BIT_CMP_OP(v_uint64x2)
|
||||
OPENCV_HAL_IMPL_SSE_64BIT_CMP_OP(v_int64x2)
|
||||
|
||||
inline v_float32x4 v_not_nan(const v_float32x4& a)
|
||||
{ return v_float32x4(_mm_cmpord_ps(a.val, a.val)); }
|
||||
|
@ -1393,6 +1618,41 @@ inline void v_store_high(_Tp* ptr, const _Tpvec& a) \
|
|||
OPENCV_HAL_IMPL_SSE_LOADSTORE_FLT_OP(v_float32x4, float, ps)
|
||||
OPENCV_HAL_IMPL_SSE_LOADSTORE_FLT_OP(v_float64x2, double, pd)
|
||||
|
||||
inline unsigned v_reduce_sum(const v_uint8x16& a)
|
||||
{
|
||||
__m128i half = _mm_sad_epu8(a.val, _mm_setzero_si128());
|
||||
return (unsigned)_mm_cvtsi128_si32(_mm_add_epi32(half, _mm_unpackhi_epi64(half, half)));
|
||||
}
|
||||
inline int v_reduce_sum(const v_int8x16& a)
|
||||
{
|
||||
__m128i half = _mm_set1_epi8((schar)-128);
|
||||
half = _mm_sad_epu8(_mm_xor_si128(a.val, half), _mm_setzero_si128());
|
||||
return _mm_cvtsi128_si32(_mm_add_epi32(half, _mm_unpackhi_epi64(half, half))) - 2048;
|
||||
}
|
||||
#define OPENCV_HAL_IMPL_SSE_REDUCE_OP_16(func) \
|
||||
inline schar v_reduce_##func(const v_int8x16& a) \
|
||||
{ \
|
||||
__m128i val = a.val; \
|
||||
__m128i smask = _mm_set1_epi8((schar)-128); \
|
||||
val = _mm_xor_si128(val, smask); \
|
||||
val = _mm_##func##_epu8(val, _mm_srli_si128(val,8)); \
|
||||
val = _mm_##func##_epu8(val, _mm_srli_si128(val,4)); \
|
||||
val = _mm_##func##_epu8(val, _mm_srli_si128(val,2)); \
|
||||
val = _mm_##func##_epu8(val, _mm_srli_si128(val,1)); \
|
||||
return (schar)_mm_cvtsi128_si32(val) ^ (schar)-128; \
|
||||
} \
|
||||
inline uchar v_reduce_##func(const v_uint8x16& a) \
|
||||
{ \
|
||||
__m128i val = a.val; \
|
||||
val = _mm_##func##_epu8(val, _mm_srli_si128(val,8)); \
|
||||
val = _mm_##func##_epu8(val, _mm_srli_si128(val,4)); \
|
||||
val = _mm_##func##_epu8(val, _mm_srli_si128(val,2)); \
|
||||
val = _mm_##func##_epu8(val, _mm_srli_si128(val,1)); \
|
||||
return (uchar)_mm_cvtsi128_si32(val); \
|
||||
}
|
||||
OPENCV_HAL_IMPL_SSE_REDUCE_OP_16(max)
|
||||
OPENCV_HAL_IMPL_SSE_REDUCE_OP_16(min)
|
||||
|
||||
#define OPENCV_HAL_IMPL_SSE_REDUCE_OP_8(_Tpvec, scalartype, func, suffix, sbit) \
|
||||
inline scalartype v_reduce_##func(const v_##_Tpvec& a) \
|
||||
{ \
|
||||
|
@ -1412,26 +1672,8 @@ inline unsigned scalartype v_reduce_##func(const v_u##_Tpvec& a) \
|
|||
val = _mm_##func##_##suffix(val, _mm_srli_si128(val,2)); \
|
||||
return (unsigned scalartype)(_mm_cvtsi128_si32(val) ^ sbit); \
|
||||
}
|
||||
#define OPENCV_HAL_IMPL_SSE_REDUCE_OP_8_SUM(_Tpvec, scalartype, suffix) \
|
||||
inline scalartype v_reduce_sum(const v_##_Tpvec& a) \
|
||||
{ \
|
||||
__m128i val = a.val; \
|
||||
val = _mm_adds_epi##suffix(val, _mm_srli_si128(val, 8)); \
|
||||
val = _mm_adds_epi##suffix(val, _mm_srli_si128(val, 4)); \
|
||||
val = _mm_adds_epi##suffix(val, _mm_srli_si128(val, 2)); \
|
||||
return (scalartype)_mm_cvtsi128_si32(val); \
|
||||
} \
|
||||
inline unsigned scalartype v_reduce_sum(const v_u##_Tpvec& a) \
|
||||
{ \
|
||||
__m128i val = a.val; \
|
||||
val = _mm_adds_epu##suffix(val, _mm_srli_si128(val, 8)); \
|
||||
val = _mm_adds_epu##suffix(val, _mm_srli_si128(val, 4)); \
|
||||
val = _mm_adds_epu##suffix(val, _mm_srli_si128(val, 2)); \
|
||||
return (unsigned scalartype)_mm_cvtsi128_si32(val); \
|
||||
}
|
||||
OPENCV_HAL_IMPL_SSE_REDUCE_OP_8(int16x8, short, max, epi16, (short)-32768)
|
||||
OPENCV_HAL_IMPL_SSE_REDUCE_OP_8(int16x8, short, min, epi16, (short)-32768)
|
||||
OPENCV_HAL_IMPL_SSE_REDUCE_OP_8_SUM(int16x8, short, 16)
|
||||
|
||||
#define OPENCV_HAL_IMPL_SSE_REDUCE_OP_4_SUM(_Tpvec, scalartype, regtype, suffix, cast_from, cast_to, extract) \
|
||||
inline scalartype v_reduce_sum(const _Tpvec& a) \
|
||||
|
@ -1456,6 +1698,23 @@ OPENCV_HAL_IMPL_SSE_REDUCE_OP_4_SUM(v_uint32x4, unsigned, __m128i, epi32, OPENCV
|
|||
OPENCV_HAL_IMPL_SSE_REDUCE_OP_4_SUM(v_int32x4, int, __m128i, epi32, OPENCV_HAL_NOP, OPENCV_HAL_NOP, si128_si32)
|
||||
OPENCV_HAL_IMPL_SSE_REDUCE_OP_4_SUM(v_float32x4, float, __m128, ps, _mm_castps_si128, _mm_castsi128_ps, ss_f32)
|
||||
|
||||
inline int v_reduce_sum(const v_int16x8& a)
|
||||
{ return v_reduce_sum(v_expand_low(a) + v_expand_high(a)); }
|
||||
inline unsigned v_reduce_sum(const v_uint16x8& a)
|
||||
{ return v_reduce_sum(v_expand_low(a) + v_expand_high(a)); }
|
||||
|
||||
inline uint64 v_reduce_sum(const v_uint64x2& a)
|
||||
{
|
||||
uint64 CV_DECL_ALIGNED(32) idx[2];
|
||||
v_store_aligned(idx, a);
|
||||
return idx[0] + idx[1];
|
||||
}
|
||||
inline int64 v_reduce_sum(const v_int64x2& a)
|
||||
{
|
||||
int64 CV_DECL_ALIGNED(32) idx[2];
|
||||
v_store_aligned(idx, a);
|
||||
return idx[0] + idx[1];
|
||||
}
|
||||
inline double v_reduce_sum(const v_float64x2& a)
|
||||
{
|
||||
double CV_DECL_ALIGNED(32) idx[2];
|
||||
|
@ -1486,13 +1745,14 @@ OPENCV_HAL_IMPL_SSE_REDUCE_OP_4(v_float32x4, float, min, std::min)
|
|||
|
||||
inline unsigned v_reduce_sad(const v_uint8x16& a, const v_uint8x16& b)
|
||||
{
|
||||
return (unsigned)_mm_cvtsi128_si32(_mm_sad_epu8(a.val, b.val));
|
||||
__m128i half = _mm_sad_epu8(a.val, b.val);
|
||||
return (unsigned)_mm_cvtsi128_si32(_mm_add_epi32(half, _mm_unpackhi_epi64(half, half)));
|
||||
}
|
||||
inline unsigned v_reduce_sad(const v_int8x16& a, const v_int8x16& b)
|
||||
{
|
||||
__m128i half = _mm_set1_epi8(0x7f);
|
||||
return (unsigned)_mm_cvtsi128_si32(_mm_sad_epu8(_mm_add_epi8(a.val, half),
|
||||
_mm_add_epi8(b.val, half)));
|
||||
half = _mm_sad_epu8(_mm_add_epi8(a.val, half), _mm_add_epi8(b.val, half));
|
||||
return (unsigned)_mm_cvtsi128_si32(_mm_add_epi32(half, _mm_unpackhi_epi64(half, half)));
|
||||
}
|
||||
inline unsigned v_reduce_sad(const v_uint16x8& a, const v_uint16x8& b)
|
||||
{
|
||||
|
@ -1519,53 +1779,73 @@ inline float v_reduce_sad(const v_float32x4& a, const v_float32x4& b)
|
|||
return v_reduce_sum(v_absdiff(a, b));
|
||||
}
|
||||
|
||||
#define OPENCV_HAL_IMPL_SSE_POPCOUNT(_Tpvec) \
|
||||
inline v_uint32x4 v_popcount(const _Tpvec& a) \
|
||||
{ \
|
||||
__m128i m1 = _mm_set1_epi32(0x55555555); \
|
||||
__m128i m2 = _mm_set1_epi32(0x33333333); \
|
||||
__m128i m4 = _mm_set1_epi32(0x0f0f0f0f); \
|
||||
__m128i p = a.val; \
|
||||
p = _mm_add_epi32(_mm_and_si128(_mm_srli_epi32(p, 1), m1), _mm_and_si128(p, m1)); \
|
||||
p = _mm_add_epi32(_mm_and_si128(_mm_srli_epi32(p, 2), m2), _mm_and_si128(p, m2)); \
|
||||
p = _mm_add_epi32(_mm_and_si128(_mm_srli_epi32(p, 4), m4), _mm_and_si128(p, m4)); \
|
||||
p = _mm_adds_epi8(p, _mm_srli_si128(p, 1)); \
|
||||
p = _mm_adds_epi8(p, _mm_srli_si128(p, 2)); \
|
||||
return v_uint32x4(_mm_and_si128(p, _mm_set1_epi32(0x000000ff))); \
|
||||
}
|
||||
|
||||
OPENCV_HAL_IMPL_SSE_POPCOUNT(v_uint8x16)
|
||||
OPENCV_HAL_IMPL_SSE_POPCOUNT(v_uint16x8)
|
||||
OPENCV_HAL_IMPL_SSE_POPCOUNT(v_uint32x4)
|
||||
OPENCV_HAL_IMPL_SSE_POPCOUNT(v_int8x16)
|
||||
OPENCV_HAL_IMPL_SSE_POPCOUNT(v_int16x8)
|
||||
OPENCV_HAL_IMPL_SSE_POPCOUNT(v_int32x4)
|
||||
|
||||
#define OPENCV_HAL_IMPL_SSE_CHECK_SIGNS(_Tpvec, suffix, pack_op, and_op, signmask, allmask) \
|
||||
inline int v_signmask(const _Tpvec& a) \
|
||||
{ \
|
||||
return and_op(_mm_movemask_##suffix(pack_op(a.val)), signmask); \
|
||||
} \
|
||||
inline bool v_check_all(const _Tpvec& a) \
|
||||
{ return and_op(_mm_movemask_##suffix(a.val), allmask) == allmask; } \
|
||||
inline bool v_check_any(const _Tpvec& a) \
|
||||
{ return and_op(_mm_movemask_##suffix(a.val), allmask) != 0; }
|
||||
|
||||
#define OPENCV_HAL_PACKS(a) _mm_packs_epi16(a, a)
|
||||
inline __m128i v_packq_epi32(__m128i a)
|
||||
inline v_uint8x16 v_popcount(const v_uint8x16& a)
|
||||
{
|
||||
__m128i b = _mm_packs_epi32(a, a);
|
||||
return _mm_packs_epi16(b, b);
|
||||
__m128i m1 = _mm_set1_epi32(0x55555555);
|
||||
__m128i m2 = _mm_set1_epi32(0x33333333);
|
||||
__m128i m4 = _mm_set1_epi32(0x0f0f0f0f);
|
||||
__m128i p = a.val;
|
||||
p = _mm_add_epi32(_mm_and_si128(_mm_srli_epi32(p, 1), m1), _mm_and_si128(p, m1));
|
||||
p = _mm_add_epi32(_mm_and_si128(_mm_srli_epi32(p, 2), m2), _mm_and_si128(p, m2));
|
||||
p = _mm_add_epi32(_mm_and_si128(_mm_srli_epi32(p, 4), m4), _mm_and_si128(p, m4));
|
||||
return v_uint8x16(p);
|
||||
}
|
||||
inline v_uint16x8 v_popcount(const v_uint16x8& a)
|
||||
{
|
||||
v_uint8x16 p = v_popcount(v_reinterpret_as_u8(a));
|
||||
p += v_rotate_right<1>(p);
|
||||
return v_reinterpret_as_u16(p) & v_setall_u16(0x00ff);
|
||||
}
|
||||
inline v_uint32x4 v_popcount(const v_uint32x4& a)
|
||||
{
|
||||
v_uint8x16 p = v_popcount(v_reinterpret_as_u8(a));
|
||||
p += v_rotate_right<1>(p);
|
||||
p += v_rotate_right<2>(p);
|
||||
return v_reinterpret_as_u32(p) & v_setall_u32(0x000000ff);
|
||||
}
|
||||
inline v_uint64x2 v_popcount(const v_uint64x2& a)
|
||||
{
|
||||
return v_uint64x2(_mm_sad_epu8(v_popcount(v_reinterpret_as_u8(a)).val, _mm_setzero_si128()));
|
||||
}
|
||||
inline v_uint8x16 v_popcount(const v_int8x16& a)
|
||||
{ return v_popcount(v_reinterpret_as_u8(a)); }
|
||||
inline v_uint16x8 v_popcount(const v_int16x8& a)
|
||||
{ return v_popcount(v_reinterpret_as_u16(a)); }
|
||||
inline v_uint32x4 v_popcount(const v_int32x4& a)
|
||||
{ return v_popcount(v_reinterpret_as_u32(a)); }
|
||||
inline v_uint64x2 v_popcount(const v_int64x2& a)
|
||||
{ return v_popcount(v_reinterpret_as_u64(a)); }
|
||||
|
||||
OPENCV_HAL_IMPL_SSE_CHECK_SIGNS(v_uint8x16, epi8, OPENCV_HAL_NOP, OPENCV_HAL_1ST, 65535, 65535)
|
||||
OPENCV_HAL_IMPL_SSE_CHECK_SIGNS(v_int8x16, epi8, OPENCV_HAL_NOP, OPENCV_HAL_1ST, 65535, 65535)
|
||||
OPENCV_HAL_IMPL_SSE_CHECK_SIGNS(v_uint16x8, epi8, OPENCV_HAL_PACKS, OPENCV_HAL_AND, 255, (int)0xaaaa)
|
||||
OPENCV_HAL_IMPL_SSE_CHECK_SIGNS(v_int16x8, epi8, OPENCV_HAL_PACKS, OPENCV_HAL_AND, 255, (int)0xaaaa)
|
||||
OPENCV_HAL_IMPL_SSE_CHECK_SIGNS(v_uint32x4, epi8, v_packq_epi32, OPENCV_HAL_AND, 15, (int)0x8888)
|
||||
OPENCV_HAL_IMPL_SSE_CHECK_SIGNS(v_int32x4, epi8, v_packq_epi32, OPENCV_HAL_AND, 15, (int)0x8888)
|
||||
OPENCV_HAL_IMPL_SSE_CHECK_SIGNS(v_float32x4, ps, OPENCV_HAL_NOP, OPENCV_HAL_1ST, 15, 15)
|
||||
OPENCV_HAL_IMPL_SSE_CHECK_SIGNS(v_float64x2, pd, OPENCV_HAL_NOP, OPENCV_HAL_1ST, 3, 3)
|
||||
#define OPENCV_HAL_IMPL_SSE_CHECK_SIGNS(_Tpvec, suffix, cast_op, allmask) \
|
||||
inline int v_signmask(const _Tpvec& a) { return _mm_movemask_##suffix(cast_op(a.val)); } \
|
||||
inline bool v_check_all(const _Tpvec& a) { return _mm_movemask_##suffix(cast_op(a.val)) == allmask; } \
|
||||
inline bool v_check_any(const _Tpvec& a) { return _mm_movemask_##suffix(cast_op(a.val)) != 0; }
|
||||
OPENCV_HAL_IMPL_SSE_CHECK_SIGNS(v_uint8x16, epi8, OPENCV_HAL_NOP, 65535)
|
||||
OPENCV_HAL_IMPL_SSE_CHECK_SIGNS(v_int8x16, epi8, OPENCV_HAL_NOP, 65535)
|
||||
OPENCV_HAL_IMPL_SSE_CHECK_SIGNS(v_uint32x4, ps, _mm_castsi128_ps, 15)
|
||||
OPENCV_HAL_IMPL_SSE_CHECK_SIGNS(v_int32x4, ps, _mm_castsi128_ps, 15)
|
||||
OPENCV_HAL_IMPL_SSE_CHECK_SIGNS(v_uint64x2, pd, _mm_castsi128_pd, 3)
|
||||
OPENCV_HAL_IMPL_SSE_CHECK_SIGNS(v_int64x2, pd, _mm_castsi128_pd, 3)
|
||||
OPENCV_HAL_IMPL_SSE_CHECK_SIGNS(v_float32x4, ps, OPENCV_HAL_NOP, 15)
|
||||
OPENCV_HAL_IMPL_SSE_CHECK_SIGNS(v_float64x2, pd, OPENCV_HAL_NOP, 3)
|
||||
|
||||
#define OPENCV_HAL_IMPL_SSE_CHECK_SIGNS_SHORT(_Tpvec) \
|
||||
inline int v_signmask(const _Tpvec& a) { return _mm_movemask_epi8(_mm_packs_epi16(a.val, a.val)) & 255; } \
|
||||
inline bool v_check_all(const _Tpvec& a) { return (_mm_movemask_epi8(a.val) & 0xaaaa) == 0xaaaa; } \
|
||||
inline bool v_check_any(const _Tpvec& a) { return (_mm_movemask_epi8(a.val) & 0xaaaa) != 0; }
|
||||
OPENCV_HAL_IMPL_SSE_CHECK_SIGNS_SHORT(v_uint16x8)
|
||||
OPENCV_HAL_IMPL_SSE_CHECK_SIGNS_SHORT(v_int16x8)
|
||||
|
||||
inline int v_scan_forward(const v_int8x16& a) { return trailingZeros32(v_signmask(v_reinterpret_as_s8(a))); }
|
||||
inline int v_scan_forward(const v_uint8x16& a) { return trailingZeros32(v_signmask(v_reinterpret_as_s8(a))); }
|
||||
inline int v_scan_forward(const v_int16x8& a) { return trailingZeros32(v_signmask(v_reinterpret_as_s8(a))) / 2; }
|
||||
inline int v_scan_forward(const v_uint16x8& a) { return trailingZeros32(v_signmask(v_reinterpret_as_s8(a))) / 2; }
|
||||
inline int v_scan_forward(const v_int32x4& a) { return trailingZeros32(v_signmask(v_reinterpret_as_s8(a))) / 4; }
|
||||
inline int v_scan_forward(const v_uint32x4& a) { return trailingZeros32(v_signmask(v_reinterpret_as_s8(a))) / 4; }
|
||||
inline int v_scan_forward(const v_float32x4& a) { return trailingZeros32(v_signmask(v_reinterpret_as_s8(a))) / 4; }
|
||||
inline int v_scan_forward(const v_int64x2& a) { return trailingZeros32(v_signmask(v_reinterpret_as_s8(a))) / 8; }
|
||||
inline int v_scan_forward(const v_uint64x2& a) { return trailingZeros32(v_signmask(v_reinterpret_as_s8(a))) / 8; }
|
||||
inline int v_scan_forward(const v_float64x2& a) { return trailingZeros32(v_signmask(v_reinterpret_as_s8(a))) / 8; }
|
||||
|
||||
#if CV_SSE4_1
|
||||
#define OPENCV_HAL_IMPL_SSE_SELECT(_Tpvec, cast_ret, cast, suffix) \
|
||||
|
@ -1671,6 +1951,59 @@ OPENCV_HAL_IMPL_SSE_UNPACKS(v_int32x4, epi32, OPENCV_HAL_NOP, OPENCV_HAL_NOP)
|
|||
OPENCV_HAL_IMPL_SSE_UNPACKS(v_float32x4, ps, _mm_castps_si128, _mm_castsi128_ps)
|
||||
OPENCV_HAL_IMPL_SSE_UNPACKS(v_float64x2, pd, _mm_castpd_si128, _mm_castsi128_pd)
|
||||
|
||||
inline v_uint8x16 v_reverse(const v_uint8x16 &a)
|
||||
{
|
||||
#if CV_SSSE3
|
||||
static const __m128i perm = _mm_setr_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
|
||||
return v_uint8x16(_mm_shuffle_epi8(a.val, perm));
|
||||
#else
|
||||
uchar CV_DECL_ALIGNED(32) d[16];
|
||||
v_store_aligned(d, a);
|
||||
return v_uint8x16(d[15], d[14], d[13], d[12], d[11], d[10], d[9], d[8], d[7], d[6], d[5], d[4], d[3], d[2], d[1], d[0]);
|
||||
#endif
|
||||
}
|
||||
|
||||
inline v_int8x16 v_reverse(const v_int8x16 &a)
|
||||
{ return v_reinterpret_as_s8(v_reverse(v_reinterpret_as_u8(a))); }
|
||||
|
||||
inline v_uint16x8 v_reverse(const v_uint16x8 &a)
|
||||
{
|
||||
#if CV_SSSE3
|
||||
static const __m128i perm = _mm_setr_epi8(14, 15, 12, 13, 10, 11, 8, 9, 6, 7, 4, 5, 2, 3, 0, 1);
|
||||
return v_uint16x8(_mm_shuffle_epi8(a.val, perm));
|
||||
#else
|
||||
__m128i r = _mm_shuffle_epi32(a.val, _MM_SHUFFLE(0, 1, 2, 3));
|
||||
r = _mm_shufflelo_epi16(r, _MM_SHUFFLE(2, 3, 0, 1));
|
||||
r = _mm_shufflehi_epi16(r, _MM_SHUFFLE(2, 3, 0, 1));
|
||||
return v_uint16x8(r);
|
||||
#endif
|
||||
}
|
||||
|
||||
inline v_int16x8 v_reverse(const v_int16x8 &a)
|
||||
{ return v_reinterpret_as_s16(v_reverse(v_reinterpret_as_u16(a))); }
|
||||
|
||||
inline v_uint32x4 v_reverse(const v_uint32x4 &a)
|
||||
{
|
||||
return v_uint32x4(_mm_shuffle_epi32(a.val, _MM_SHUFFLE(0, 1, 2, 3)));
|
||||
}
|
||||
|
||||
inline v_int32x4 v_reverse(const v_int32x4 &a)
|
||||
{ return v_reinterpret_as_s32(v_reverse(v_reinterpret_as_u32(a))); }
|
||||
|
||||
inline v_float32x4 v_reverse(const v_float32x4 &a)
|
||||
{ return v_reinterpret_as_f32(v_reverse(v_reinterpret_as_u32(a))); }
|
||||
|
||||
inline v_uint64x2 v_reverse(const v_uint64x2 &a)
|
||||
{
|
||||
return v_uint64x2(_mm_shuffle_epi32(a.val, _MM_SHUFFLE(1, 0, 3, 2)));
|
||||
}
|
||||
|
||||
inline v_int64x2 v_reverse(const v_int64x2 &a)
|
||||
{ return v_reinterpret_as_s64(v_reverse(v_reinterpret_as_u64(a))); }
|
||||
|
||||
inline v_float64x2 v_reverse(const v_float64x2 &a)
|
||||
{ return v_reinterpret_as_f64(v_reverse(v_reinterpret_as_u64(a))); }
|
||||
|
||||
template<int s, typename _Tpvec>
|
||||
inline _Tpvec v_extract(const _Tpvec& a, const _Tpvec& b)
|
||||
{
|
||||
|
@ -2684,18 +3017,31 @@ inline v_float64x2 v_cvt_f64_high(const v_float32x4& a)
|
|||
return v_float64x2(_mm_cvtps_pd(_mm_movehl_ps(a.val, a.val)));
|
||||
}
|
||||
|
||||
#if CV_FP16
|
||||
inline v_float32x4 v128_load_fp16_f32(const short* ptr)
|
||||
// from (Mysticial and wim) https://stackoverflow.com/q/41144668
|
||||
inline v_float64x2 v_cvt_f64(const v_int64x2& v)
|
||||
{
|
||||
return v_float32x4(_mm_cvtph_ps(_mm_loadu_si128((const __m128i*)ptr)));
|
||||
}
|
||||
|
||||
inline void v_store_fp16(short* ptr, const v_float32x4& a)
|
||||
{
|
||||
__m128i fp16_value = _mm_cvtps_ph(a.val, 0);
|
||||
_mm_storel_epi64((__m128i*)ptr, fp16_value);
|
||||
}
|
||||
// constants encoded as floating-point
|
||||
__m128i magic_i_hi32 = _mm_set1_epi64x(0x4530000080000000); // 2^84 + 2^63
|
||||
__m128i magic_i_all = _mm_set1_epi64x(0x4530000080100000); // 2^84 + 2^63 + 2^52
|
||||
__m128d magic_d_all = _mm_castsi128_pd(magic_i_all);
|
||||
// Blend the 32 lowest significant bits of v with magic_int_lo
|
||||
#if CV_SSE4_1
|
||||
__m128i magic_i_lo = _mm_set1_epi64x(0x4330000000000000); // 2^52
|
||||
__m128i v_lo = _mm_blend_epi16(v.val, magic_i_lo, 0xcc);
|
||||
#else
|
||||
__m128i magic_i_lo = _mm_set1_epi32(0x43300000); // 2^52
|
||||
__m128i v_lo = _mm_unpacklo_epi32(_mm_shuffle_epi32(v.val, _MM_SHUFFLE(0, 0, 2, 0)), magic_i_lo);
|
||||
#endif
|
||||
// Extract the 32 most significant bits of v
|
||||
__m128i v_hi = _mm_srli_epi64(v.val, 32);
|
||||
// Flip the msb of v_hi and blend with 0x45300000
|
||||
v_hi = _mm_xor_si128(v_hi, magic_i_hi32);
|
||||
// Compute in double precision
|
||||
__m128d v_hi_dbl = _mm_sub_pd(_mm_castsi128_pd(v_hi), magic_d_all);
|
||||
// (v_hi - magic_d_all) + v_lo Do not assume associativity of floating point addition
|
||||
__m128d result = _mm_add_pd(v_hi_dbl, _mm_castsi128_pd(v_lo));
|
||||
return v_float64x2(result);
|
||||
}
|
||||
|
||||
////////////// Lookup table access ////////////////////
|
||||
|
||||
|
@ -2952,10 +3298,107 @@ inline v_int32x4 v_pack_triplets(const v_int32x4& vec) { return vec; }
|
|||
inline v_uint32x4 v_pack_triplets(const v_uint32x4& vec) { return vec; }
|
||||
inline v_float32x4 v_pack_triplets(const v_float32x4& vec) { return vec; }
|
||||
|
||||
template<int i>
|
||||
inline uchar v_extract_n(const v_uint8x16& v)
|
||||
{
|
||||
#if CV_SSE4_1
|
||||
return (uchar)_mm_extract_epi8(v.val, i);
|
||||
#else
|
||||
return v_rotate_right<i>(v).get0();
|
||||
#endif
|
||||
}
|
||||
|
||||
template<int i>
|
||||
inline schar v_extract_n(const v_int8x16& v)
|
||||
{
|
||||
return (schar)v_extract_n<i>(v_reinterpret_as_u8(v));
|
||||
}
|
||||
|
||||
template<int i>
|
||||
inline ushort v_extract_n(const v_uint16x8& v)
|
||||
{
|
||||
return (ushort)_mm_extract_epi16(v.val, i);
|
||||
}
|
||||
|
||||
template<int i>
|
||||
inline short v_extract_n(const v_int16x8& v)
|
||||
{
|
||||
return (short)v_extract_n<i>(v_reinterpret_as_u16(v));
|
||||
}
|
||||
|
||||
template<int i>
|
||||
inline uint v_extract_n(const v_uint32x4& v)
|
||||
{
|
||||
#if CV_SSE4_1
|
||||
return (uint)_mm_extract_epi32(v.val, i);
|
||||
#else
|
||||
return v_rotate_right<i>(v).get0();
|
||||
#endif
|
||||
}
|
||||
|
||||
template<int i>
|
||||
inline int v_extract_n(const v_int32x4& v)
|
||||
{
|
||||
return (int)v_extract_n<i>(v_reinterpret_as_u32(v));
|
||||
}
|
||||
|
||||
template<int i>
|
||||
inline uint64 v_extract_n(const v_uint64x2& v)
|
||||
{
|
||||
#ifdef CV__SIMD_NATIVE_mm_extract_epi64
|
||||
return (uint64)_v128_extract_epi64<i>(v.val);
|
||||
#else
|
||||
return v_rotate_right<i>(v).get0();
|
||||
#endif
|
||||
}
|
||||
|
||||
template<int i>
|
||||
inline int64 v_extract_n(const v_int64x2& v)
|
||||
{
|
||||
return (int64)v_extract_n<i>(v_reinterpret_as_u64(v));
|
||||
}
|
||||
|
||||
template<int i>
|
||||
inline float v_extract_n(const v_float32x4& v)
|
||||
{
|
||||
union { uint iv; float fv; } d;
|
||||
d.iv = v_extract_n<i>(v_reinterpret_as_u32(v));
|
||||
return d.fv;
|
||||
}
|
||||
|
||||
template<int i>
|
||||
inline double v_extract_n(const v_float64x2& v)
|
||||
{
|
||||
union { uint64 iv; double dv; } d;
|
||||
d.iv = v_extract_n<i>(v_reinterpret_as_u64(v));
|
||||
return d.dv;
|
||||
}
|
||||
|
||||
template<int i>
|
||||
inline v_int32x4 v_broadcast_element(const v_int32x4& v)
|
||||
{
|
||||
return v_int32x4(_mm_shuffle_epi32(v.val, _MM_SHUFFLE(i,i,i,i)));
|
||||
}
|
||||
|
||||
template<int i>
|
||||
inline v_uint32x4 v_broadcast_element(const v_uint32x4& v)
|
||||
{
|
||||
return v_uint32x4(_mm_shuffle_epi32(v.val, _MM_SHUFFLE(i,i,i,i)));
|
||||
}
|
||||
|
||||
template<int i>
|
||||
inline v_float32x4 v_broadcast_element(const v_float32x4& v)
|
||||
{
|
||||
return v_float32x4(_mm_shuffle_ps(v.val, v.val, _MM_SHUFFLE((char)i,(char)i,(char)i,(char)i)));
|
||||
}
|
||||
|
||||
////////////// FP16 support ///////////////////////////
|
||||
|
||||
inline v_float32x4 v_load_expand(const float16_t* ptr)
|
||||
{
|
||||
#if CV_FP16
|
||||
return v_float32x4(_mm_cvtph_ps(_mm_loadu_si128((const __m128i*)ptr)));
|
||||
#else
|
||||
const __m128i z = _mm_setzero_si128(), delta = _mm_set1_epi32(0x38000000);
|
||||
const __m128i signmask = _mm_set1_epi32(0x80000000), maxexp = _mm_set1_epi32(0x7c000000);
|
||||
const __m128 deltaf = _mm_castsi128_ps(_mm_set1_epi32(0x38800000));
|
||||
|
@ -2968,10 +3411,15 @@ inline v_float32x4 v_load_expand(const float16_t* ptr)
|
|||
__m128i zmask = _mm_cmpeq_epi32(e, z);
|
||||
__m128i ft = v_select_si128(zmask, zt, t);
|
||||
return v_float32x4(_mm_castsi128_ps(_mm_or_si128(ft, sign)));
|
||||
#endif
|
||||
}
|
||||
|
||||
inline void v_pack_store(float16_t* ptr, const v_float32x4& v)
|
||||
{
|
||||
#if CV_FP16
|
||||
__m128i fp16_value = _mm_cvtps_ph(v.val, 0);
|
||||
_mm_storel_epi64((__m128i*)ptr, fp16_value);
|
||||
#else
|
||||
const __m128i signmask = _mm_set1_epi32(0x80000000);
|
||||
const __m128i rval = _mm_set1_epi32(0x3f000000);
|
||||
|
||||
|
@ -2993,20 +3441,11 @@ inline void v_pack_store(float16_t* ptr, const v_float32x4& v)
|
|||
t = _mm_or_si128(t, sign);
|
||||
t = _mm_packs_epi32(t, t);
|
||||
_mm_storel_epi64((__m128i*)ptr, t);
|
||||
#endif
|
||||
}
|
||||
|
||||
inline void v_cleanup() {}
|
||||
|
||||
//! @name Check SIMD support
|
||||
//! @{
|
||||
//! @brief Check CPU capability of SIMD operation
|
||||
static inline bool hasSIMD128()
|
||||
{
|
||||
return (CV_CPU_HAS_SUPPORT_SSE2) ? true : false;
|
||||
}
|
||||
|
||||
//! @}
|
||||
|
||||
CV_CPU_OPTIMIZATION_HAL_NAMESPACE_END
|
||||
|
||||
//! @endcond
|
|
@ -158,10 +158,23 @@ inline __m128i _v128_packs_epu32(const __m128i& a, const __m128i& b)
|
|||
#endif
|
||||
}
|
||||
|
||||
template<int i>
|
||||
inline int64 _v128_extract_epi64(const __m128i& a)
|
||||
{
|
||||
#if defined(CV__SIMD_HAVE_mm_extract_epi64) || (CV_SSE4_1 && (defined(__x86_64__)/*GCC*/ || defined(_M_X64)/*MSVC*/))
|
||||
#define CV__SIMD_NATIVE_mm_extract_epi64 1
|
||||
return _mm_extract_epi64(a, i);
|
||||
#else
|
||||
CV_DECL_ALIGNED(16) int64 tmp[2];
|
||||
_mm_store_si128((__m128i*)tmp, a);
|
||||
return tmp[i];
|
||||
#endif
|
||||
}
|
||||
|
||||
CV_CPU_OPTIMIZATION_HAL_NAMESPACE_END
|
||||
|
||||
//! @endcond
|
||||
|
||||
} // cv::
|
||||
|
||||
#endif // OPENCV_HAL_INTRIN_SSE_EM_HPP
|
||||
#endif // OPENCV_HAL_INTRIN_SSE_EM_HPP
|
|
@ -28,7 +28,7 @@ struct v_uint8x16
|
|||
|
||||
explicit v_uint8x16(const vec_uchar16& v) : val(v)
|
||||
{}
|
||||
v_uint8x16() : val(vec_uchar16_z)
|
||||
v_uint8x16()
|
||||
{}
|
||||
v_uint8x16(vec_bchar16 v) : val(vec_uchar16_c(v))
|
||||
{}
|
||||
|
@ -36,6 +36,9 @@ struct v_uint8x16
|
|||
uchar v8, uchar v9, uchar v10, uchar v11, uchar v12, uchar v13, uchar v14, uchar v15)
|
||||
: val(vec_uchar16_set(v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15))
|
||||
{}
|
||||
|
||||
static inline v_uint8x16 zero() { return v_uint8x16(vec_uchar16_z); }
|
||||
|
||||
uchar get0() const
|
||||
{ return vec_extract(val, 0); }
|
||||
};
|
||||
|
@ -48,7 +51,7 @@ struct v_int8x16
|
|||
|
||||
explicit v_int8x16(const vec_char16& v) : val(v)
|
||||
{}
|
||||
v_int8x16() : val(vec_char16_z)
|
||||
v_int8x16()
|
||||
{}
|
||||
v_int8x16(vec_bchar16 v) : val(vec_char16_c(v))
|
||||
{}
|
||||
|
@ -56,6 +59,9 @@ struct v_int8x16
|
|||
schar v8, schar v9, schar v10, schar v11, schar v12, schar v13, schar v14, schar v15)
|
||||
: val(vec_char16_set(v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15))
|
||||
{}
|
||||
|
||||
static inline v_int8x16 zero() { return v_int8x16(vec_char16_z); }
|
||||
|
||||
schar get0() const
|
||||
{ return vec_extract(val, 0); }
|
||||
};
|
||||
|
@ -68,13 +74,16 @@ struct v_uint16x8
|
|||
|
||||
explicit v_uint16x8(const vec_ushort8& v) : val(v)
|
||||
{}
|
||||
v_uint16x8() : val(vec_ushort8_z)
|
||||
v_uint16x8()
|
||||
{}
|
||||
v_uint16x8(vec_bshort8 v) : val(vec_ushort8_c(v))
|
||||
{}
|
||||
v_uint16x8(ushort v0, ushort v1, ushort v2, ushort v3, ushort v4, ushort v5, ushort v6, ushort v7)
|
||||
: val(vec_ushort8_set(v0, v1, v2, v3, v4, v5, v6, v7))
|
||||
{}
|
||||
|
||||
static inline v_uint16x8 zero() { return v_uint16x8(vec_ushort8_z); }
|
||||
|
||||
ushort get0() const
|
||||
{ return vec_extract(val, 0); }
|
||||
};
|
||||
|
@ -87,13 +96,16 @@ struct v_int16x8
|
|||
|
||||
explicit v_int16x8(const vec_short8& v) : val(v)
|
||||
{}
|
||||
v_int16x8() : val(vec_short8_z)
|
||||
v_int16x8()
|
||||
{}
|
||||
v_int16x8(vec_bshort8 v) : val(vec_short8_c(v))
|
||||
{}
|
||||
v_int16x8(short v0, short v1, short v2, short v3, short v4, short v5, short v6, short v7)
|
||||
: val(vec_short8_set(v0, v1, v2, v3, v4, v5, v6, v7))
|
||||
{}
|
||||
|
||||
static inline v_int16x8 zero() { return v_int16x8(vec_short8_z); }
|
||||
|
||||
short get0() const
|
||||
{ return vec_extract(val, 0); }
|
||||
};
|
||||
|
@ -106,12 +118,15 @@ struct v_uint32x4
|
|||
|
||||
explicit v_uint32x4(const vec_uint4& v) : val(v)
|
||||
{}
|
||||
v_uint32x4() : val(vec_uint4_z)
|
||||
v_uint32x4()
|
||||
{}
|
||||
v_uint32x4(vec_bint4 v) : val(vec_uint4_c(v))
|
||||
{}
|
||||
v_uint32x4(unsigned v0, unsigned v1, unsigned v2, unsigned v3) : val(vec_uint4_set(v0, v1, v2, v3))
|
||||
{}
|
||||
|
||||
static inline v_uint32x4 zero() { return v_uint32x4(vec_uint4_z); }
|
||||
|
||||
uint get0() const
|
||||
{ return vec_extract(val, 0); }
|
||||
};
|
||||
|
@ -124,12 +139,15 @@ struct v_int32x4
|
|||
|
||||
explicit v_int32x4(const vec_int4& v) : val(v)
|
||||
{}
|
||||
v_int32x4() : val(vec_int4_z)
|
||||
v_int32x4()
|
||||
{}
|
||||
v_int32x4(vec_bint4 v) : val(vec_int4_c(v))
|
||||
{}
|
||||
v_int32x4(int v0, int v1, int v2, int v3) : val(vec_int4_set(v0, v1, v2, v3))
|
||||
{}
|
||||
|
||||
static inline v_int32x4 zero() { return v_int32x4(vec_int4_z); }
|
||||
|
||||
int get0() const
|
||||
{ return vec_extract(val, 0); }
|
||||
};
|
||||
|
@ -142,12 +160,15 @@ struct v_float32x4
|
|||
|
||||
explicit v_float32x4(const vec_float4& v) : val(v)
|
||||
{}
|
||||
v_float32x4() : val(vec_float4_z)
|
||||
v_float32x4()
|
||||
{}
|
||||
v_float32x4(vec_bint4 v) : val(vec_float4_c(v))
|
||||
{}
|
||||
v_float32x4(float v0, float v1, float v2, float v3) : val(vec_float4_set(v0, v1, v2, v3))
|
||||
{}
|
||||
|
||||
static inline v_float32x4 zero() { return v_float32x4(vec_float4_z); }
|
||||
|
||||
float get0() const
|
||||
{ return vec_extract(val, 0); }
|
||||
};
|
||||
|
@ -160,12 +181,15 @@ struct v_uint64x2
|
|||
|
||||
explicit v_uint64x2(const vec_udword2& v) : val(v)
|
||||
{}
|
||||
v_uint64x2() : val(vec_udword2_z)
|
||||
v_uint64x2()
|
||||
{}
|
||||
v_uint64x2(vec_bdword2 v) : val(vec_udword2_c(v))
|
||||
{}
|
||||
v_uint64x2(uint64 v0, uint64 v1) : val(vec_udword2_set(v0, v1))
|
||||
{}
|
||||
|
||||
static inline v_uint64x2 zero() { return v_uint64x2(vec_udword2_z); }
|
||||
|
||||
uint64 get0() const
|
||||
{ return vec_extract(val, 0); }
|
||||
};
|
||||
|
@ -178,12 +202,15 @@ struct v_int64x2
|
|||
|
||||
explicit v_int64x2(const vec_dword2& v) : val(v)
|
||||
{}
|
||||
v_int64x2() : val(vec_dword2_z)
|
||||
v_int64x2()
|
||||
{}
|
||||
v_int64x2(vec_bdword2 v) : val(vec_dword2_c(v))
|
||||
{}
|
||||
v_int64x2(int64 v0, int64 v1) : val(vec_dword2_set(v0, v1))
|
||||
{}
|
||||
|
||||
static inline v_int64x2 zero() { return v_int64x2(vec_dword2_z); }
|
||||
|
||||
int64 get0() const
|
||||
{ return vec_extract(val, 0); }
|
||||
};
|
||||
|
@ -196,16 +223,33 @@ struct v_float64x2
|
|||
|
||||
explicit v_float64x2(const vec_double2& v) : val(v)
|
||||
{}
|
||||
v_float64x2() : val(vec_double2_z)
|
||||
v_float64x2()
|
||||
{}
|
||||
v_float64x2(vec_bdword2 v) : val(vec_double2_c(v))
|
||||
{}
|
||||
v_float64x2(double v0, double v1) : val(vec_double2_set(v0, v1))
|
||||
{}
|
||||
|
||||
static inline v_float64x2 zero() { return v_float64x2(vec_double2_z); }
|
||||
|
||||
double get0() const
|
||||
{ return vec_extract(val, 0); }
|
||||
};
|
||||
|
||||
#define OPENCV_HAL_IMPL_VSX_EXTRACT_N(_Tpvec, _Tp) \
|
||||
template<int i> inline _Tp v_extract_n(VSX_UNUSED(_Tpvec v)) { return vec_extract(v.val, i); }
|
||||
|
||||
OPENCV_HAL_IMPL_VSX_EXTRACT_N(v_uint8x16, uchar)
|
||||
OPENCV_HAL_IMPL_VSX_EXTRACT_N(v_int8x16, schar)
|
||||
OPENCV_HAL_IMPL_VSX_EXTRACT_N(v_uint16x8, ushort)
|
||||
OPENCV_HAL_IMPL_VSX_EXTRACT_N(v_int16x8, short)
|
||||
OPENCV_HAL_IMPL_VSX_EXTRACT_N(v_uint32x4, uint)
|
||||
OPENCV_HAL_IMPL_VSX_EXTRACT_N(v_int32x4, int)
|
||||
OPENCV_HAL_IMPL_VSX_EXTRACT_N(v_uint64x2, uint64)
|
||||
OPENCV_HAL_IMPL_VSX_EXTRACT_N(v_int64x2, int64)
|
||||
OPENCV_HAL_IMPL_VSX_EXTRACT_N(v_float32x4, float)
|
||||
OPENCV_HAL_IMPL_VSX_EXTRACT_N(v_float64x2, double)
|
||||
|
||||
//////////////// Load and store operations ///////////////
|
||||
|
||||
/*
|
||||
|
@ -215,7 +259,7 @@ struct v_float64x2
|
|||
* if vec_xxx_c defined as C++ cast, clang-5 will pass it
|
||||
*/
|
||||
#define OPENCV_HAL_IMPL_VSX_INITVEC(_Tpvec, _Tp, suffix, cast) \
|
||||
inline _Tpvec v_setzero_##suffix() { return _Tpvec(); } \
|
||||
inline _Tpvec v_setzero_##suffix() { return _Tpvec(vec_splats((_Tp)0)); } \
|
||||
inline _Tpvec v_setall_##suffix(_Tp v) { return _Tpvec(vec_splats((_Tp)v));} \
|
||||
template<typename _Tpvec0> inline _Tpvec v_reinterpret_as_##suffix(const _Tpvec0 &a) \
|
||||
{ return _Tpvec((cast)a.val); }
|
||||
|
@ -332,11 +376,37 @@ OPENCV_HAL_IMPL_VSX_EXPAND(v_int16x8, v_int32x4, short, vec_unpackl, vec_unpackh
|
|||
OPENCV_HAL_IMPL_VSX_EXPAND(v_uint32x4, v_uint64x2, uint, vec_unpacklu, vec_unpackhu)
|
||||
OPENCV_HAL_IMPL_VSX_EXPAND(v_int32x4, v_int64x2, int, vec_unpackl, vec_unpackh)
|
||||
|
||||
/* Load and zero expand a 4 byte value into the second dword, first is don't care. */
|
||||
#if !defined(CV_COMPILER_VSX_BROKEN_ASM)
|
||||
#define _LXSIWZX(out, ptr, T) __asm__ ("lxsiwzx %x0, 0, %1\r\n" : "=wa"(out) : "r" (ptr) : "memory");
|
||||
#else
|
||||
/* This is compiler-agnostic, but will introduce an unneeded splat on the critical path. */
|
||||
#define _LXSIWZX(out, ptr, T) out = (T)vec_udword2_sp(*(uint32_t*)(ptr));
|
||||
#endif
|
||||
|
||||
inline v_uint32x4 v_load_expand_q(const uchar* ptr)
|
||||
{ return v_uint32x4(vec_uint4_set(ptr[0], ptr[1], ptr[2], ptr[3])); }
|
||||
{
|
||||
// Zero-extend the extra 24B instead of unpacking. Usually faster in small kernel
|
||||
// Likewise note, value is zero extended and upper 4 bytes are zero'ed.
|
||||
vec_uchar16 pmu = {8, 12, 12, 12, 9, 12, 12, 12, 10, 12, 12, 12, 11, 12, 12, 12};
|
||||
vec_uchar16 out;
|
||||
|
||||
_LXSIWZX(out, ptr, vec_uchar16);
|
||||
out = vec_perm(out, out, pmu);
|
||||
return v_uint32x4((vec_uint4)out);
|
||||
}
|
||||
|
||||
inline v_int32x4 v_load_expand_q(const schar* ptr)
|
||||
{ return v_int32x4(vec_int4_set(ptr[0], ptr[1], ptr[2], ptr[3])); }
|
||||
{
|
||||
vec_char16 out;
|
||||
vec_short8 outs;
|
||||
vec_int4 outw;
|
||||
|
||||
_LXSIWZX(out, ptr, vec_char16);
|
||||
outs = vec_unpackl(out);
|
||||
outw = vec_unpackh(outs);
|
||||
return v_int32x4(outw);
|
||||
}
|
||||
|
||||
/* pack */
|
||||
#define OPENCV_HAL_IMPL_VSX_PACK(_Tpvec, _Tp, _Tpwvec, _Tpvn, _Tpdel, sfnc, pkfnc, addfnc, pack) \
|
||||
|
@ -499,12 +569,6 @@ inline void v_mul_expand(const Tvec& a, const Tvec& b, Twvec& c, Twvec& d)
|
|||
v_zip(p0, p1, c, d);
|
||||
}
|
||||
|
||||
inline void v_mul_expand(const v_uint32x4& a, const v_uint32x4& b, v_uint64x2& c, v_uint64x2& d)
|
||||
{
|
||||
c.val = vec_mul(vec_unpackhu(a.val), vec_unpackhu(b.val));
|
||||
d.val = vec_mul(vec_unpacklu(a.val), vec_unpacklu(b.val));
|
||||
}
|
||||
|
||||
inline v_int16x8 v_mul_hi(const v_int16x8& a, const v_int16x8& b)
|
||||
{
|
||||
vec_int4 p0 = vec_mule(a.val, b.val);
|
||||
|
@ -626,7 +690,7 @@ inline _Tpvec v_rotate_##suffix(const _Tpvec& a)
|
|||
{ \
|
||||
const int wd = imm * sizeof(typename _Tpvec::lane_type); \
|
||||
if (wd > 15) \
|
||||
return _Tpvec(); \
|
||||
return _Tpvec::zero(); \
|
||||
return _Tpvec((cast)shf(vec_uchar16_c(a.val), vec_uchar16_sp(wd << 3))); \
|
||||
}
|
||||
|
||||
|
@ -684,6 +748,53 @@ OPENCV_IMPL_VSX_ROTATE_64_2RG_LR(v_float64x2)
|
|||
OPENCV_IMPL_VSX_ROTATE_64_2RG_LR(v_uint64x2)
|
||||
OPENCV_IMPL_VSX_ROTATE_64_2RG_LR(v_int64x2)
|
||||
|
||||
/* Reverse */
|
||||
inline v_uint8x16 v_reverse(const v_uint8x16 &a)
|
||||
{
|
||||
static const vec_uchar16 perm = {15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0};
|
||||
vec_uchar16 vec = (vec_uchar16)a.val;
|
||||
return v_uint8x16(vec_perm(vec, vec, perm));
|
||||
}
|
||||
|
||||
inline v_int8x16 v_reverse(const v_int8x16 &a)
|
||||
{ return v_reinterpret_as_s8(v_reverse(v_reinterpret_as_u8(a))); }
|
||||
|
||||
inline v_uint16x8 v_reverse(const v_uint16x8 &a)
|
||||
{
|
||||
static const vec_uchar16 perm = {14, 15, 12, 13, 10, 11, 8, 9, 6, 7, 4, 5, 2, 3, 0, 1};
|
||||
vec_uchar16 vec = (vec_uchar16)a.val;
|
||||
return v_reinterpret_as_u16(v_uint8x16(vec_perm(vec, vec, perm)));
|
||||
}
|
||||
|
||||
inline v_int16x8 v_reverse(const v_int16x8 &a)
|
||||
{ return v_reinterpret_as_s16(v_reverse(v_reinterpret_as_u16(a))); }
|
||||
|
||||
inline v_uint32x4 v_reverse(const v_uint32x4 &a)
|
||||
{
|
||||
static const vec_uchar16 perm = {12, 13, 14, 15, 8, 9, 10, 11, 4, 5, 6, 7, 0, 1, 2, 3};
|
||||
vec_uchar16 vec = (vec_uchar16)a.val;
|
||||
return v_reinterpret_as_u32(v_uint8x16(vec_perm(vec, vec, perm)));
|
||||
}
|
||||
|
||||
inline v_int32x4 v_reverse(const v_int32x4 &a)
|
||||
{ return v_reinterpret_as_s32(v_reverse(v_reinterpret_as_u32(a))); }
|
||||
|
||||
inline v_float32x4 v_reverse(const v_float32x4 &a)
|
||||
{ return v_reinterpret_as_f32(v_reverse(v_reinterpret_as_u32(a))); }
|
||||
|
||||
inline v_uint64x2 v_reverse(const v_uint64x2 &a)
|
||||
{
|
||||
static const vec_uchar16 perm = {8, 9, 10, 11, 12, 13, 14, 15, 0, 1, 2, 3, 4, 5, 6, 7};
|
||||
vec_uchar16 vec = (vec_uchar16)a.val;
|
||||
return v_reinterpret_as_u64(v_uint8x16(vec_perm(vec, vec, perm)));
|
||||
}
|
||||
|
||||
inline v_int64x2 v_reverse(const v_int64x2 &a)
|
||||
{ return v_reinterpret_as_s64(v_reverse(v_reinterpret_as_u64(a))); }
|
||||
|
||||
inline v_float64x2 v_reverse(const v_float64x2 &a)
|
||||
{ return v_reinterpret_as_f64(v_reverse(v_reinterpret_as_u64(a))); }
|
||||
|
||||
/* Extract */
|
||||
template<int s, typename _Tpvec>
|
||||
inline _Tpvec v_extract(const _Tpvec& a, const _Tpvec& b)
|
||||
|
@ -692,15 +803,27 @@ inline _Tpvec v_extract(const _Tpvec& a, const _Tpvec& b)
|
|||
////////// Reduce and mask /////////
|
||||
|
||||
/** Reduce **/
|
||||
inline short v_reduce_sum(const v_int16x8& a)
|
||||
inline uint v_reduce_sum(const v_uint8x16& a)
|
||||
{
|
||||
const vec_uint4 zero4 = vec_uint4_z;
|
||||
vec_uint4 sum4 = vec_sum4s(a.val, zero4);
|
||||
return (uint)vec_extract(vec_sums(vec_int4_c(sum4), vec_int4_c(zero4)), 3);
|
||||
}
|
||||
inline int v_reduce_sum(const v_int8x16& a)
|
||||
{
|
||||
const vec_int4 zero4 = vec_int4_z;
|
||||
vec_int4 sum4 = vec_sum4s(a.val, zero4);
|
||||
return (int)vec_extract(vec_sums(sum4, zero4), 3);
|
||||
}
|
||||
inline int v_reduce_sum(const v_int16x8& a)
|
||||
{
|
||||
const vec_int4 zero = vec_int4_z;
|
||||
return saturate_cast<short>(vec_extract(vec_sums(vec_sum4s(a.val, zero), zero), 3));
|
||||
return saturate_cast<int>(vec_extract(vec_sums(vec_sum4s(a.val, zero), zero), 3));
|
||||
}
|
||||
inline ushort v_reduce_sum(const v_uint16x8& a)
|
||||
inline uint v_reduce_sum(const v_uint16x8& a)
|
||||
{
|
||||
const vec_int4 v4 = vec_int4_c(vec_unpackhu(vec_adds(a.val, vec_sld(a.val, a.val, 8))));
|
||||
return saturate_cast<ushort>(vec_extract(vec_sums(v4, vec_int4_z), 3));
|
||||
return saturate_cast<uint>(vec_extract(vec_sums(v4, vec_int4_z), 3));
|
||||
}
|
||||
|
||||
#define OPENCV_HAL_IMPL_VSX_REDUCE_OP_4(_Tpvec, _Tpvec2, scalartype, suffix, func) \
|
||||
|
@ -719,6 +842,14 @@ OPENCV_HAL_IMPL_VSX_REDUCE_OP_4(v_float32x4, vec_float4, float, sum, vec_add)
|
|||
OPENCV_HAL_IMPL_VSX_REDUCE_OP_4(v_float32x4, vec_float4, float, max, vec_max)
|
||||
OPENCV_HAL_IMPL_VSX_REDUCE_OP_4(v_float32x4, vec_float4, float, min, vec_min)
|
||||
|
||||
inline uint64 v_reduce_sum(const v_uint64x2& a)
|
||||
{
|
||||
return vec_extract(vec_add(a.val, vec_permi(a.val, a.val, 3)), 0);
|
||||
}
|
||||
inline int64 v_reduce_sum(const v_int64x2& a)
|
||||
{
|
||||
return vec_extract(vec_add(a.val, vec_permi(a.val, a.val, 3)), 0);
|
||||
}
|
||||
inline double v_reduce_sum(const v_float64x2& a)
|
||||
{
|
||||
return vec_extract(vec_add(a.val, vec_permi(a.val, a.val, 3)), 0);
|
||||
|
@ -736,6 +867,19 @@ OPENCV_HAL_IMPL_VSX_REDUCE_OP_8(v_uint16x8, vec_ushort8, ushort, min, vec_min)
|
|||
OPENCV_HAL_IMPL_VSX_REDUCE_OP_8(v_int16x8, vec_short8, short, max, vec_max)
|
||||
OPENCV_HAL_IMPL_VSX_REDUCE_OP_8(v_int16x8, vec_short8, short, min, vec_min)
|
||||
|
||||
#define OPENCV_HAL_IMPL_VSX_REDUCE_OP_16(_Tpvec, _Tpvec2, scalartype, suffix, func) \
|
||||
inline scalartype v_reduce_##suffix(const _Tpvec& a) \
|
||||
{ \
|
||||
_Tpvec2 rs = func(a.val, vec_sld(a.val, a.val, 8)); \
|
||||
rs = func(rs, vec_sld(rs, rs, 4)); \
|
||||
rs = func(rs, vec_sld(rs, rs, 2)); \
|
||||
return vec_extract(func(rs, vec_sld(rs, rs, 1)), 0); \
|
||||
}
|
||||
OPENCV_HAL_IMPL_VSX_REDUCE_OP_16(v_uint8x16, vec_uchar16, uchar, max, vec_max)
|
||||
OPENCV_HAL_IMPL_VSX_REDUCE_OP_16(v_uint8x16, vec_uchar16, uchar, min, vec_min)
|
||||
OPENCV_HAL_IMPL_VSX_REDUCE_OP_16(v_int8x16, vec_char16, schar, max, vec_max)
|
||||
OPENCV_HAL_IMPL_VSX_REDUCE_OP_16(v_int8x16, vec_char16, schar, min, vec_min)
|
||||
|
||||
inline v_float32x4 v_reduce_sum4(const v_float32x4& a, const v_float32x4& b,
|
||||
const v_float32x4& c, const v_float32x4& d)
|
||||
{
|
||||
|
@ -763,7 +907,7 @@ inline unsigned v_reduce_sad(const v_int8x16& a, const v_int8x16& b)
|
|||
inline unsigned v_reduce_sad(const v_uint16x8& a, const v_uint16x8& b)
|
||||
{
|
||||
vec_ushort8 ad = vec_absd(a.val, b.val);
|
||||
VSX_UNUSED(vec_int4) sum = vec_sums(vec_int4_c(vec_unpackhu(ad)), vec_int4_c(vec_unpacklu(ad)));
|
||||
VSX_UNUSED(vec_int4) sum = vec_sums(vec_int4_c(vec_unpackhu(ad)) + vec_int4_c(vec_unpacklu(ad)), vec_int4_z);
|
||||
return (unsigned)vec_extract(sum, 3);
|
||||
}
|
||||
inline unsigned v_reduce_sad(const v_int16x8& a, const v_int16x8& b)
|
||||
|
@ -792,43 +936,44 @@ inline float v_reduce_sad(const v_float32x4& a, const v_float32x4& b)
|
|||
}
|
||||
|
||||
/** Popcount **/
|
||||
template<typename _Tpvec>
|
||||
inline v_uint32x4 v_popcount(const _Tpvec& a)
|
||||
{ return v_uint32x4(vec_popcntu(vec_uint4_c(a.val))); }
|
||||
inline v_uint8x16 v_popcount(const v_uint8x16& a)
|
||||
{ return v_uint8x16(vec_popcntu(a.val)); }
|
||||
inline v_uint8x16 v_popcount(const v_int8x16& a)
|
||||
{ return v_uint8x16(vec_popcntu(a.val)); }
|
||||
inline v_uint16x8 v_popcount(const v_uint16x8& a)
|
||||
{ return v_uint16x8(vec_popcntu(a.val)); }
|
||||
inline v_uint16x8 v_popcount(const v_int16x8& a)
|
||||
{ return v_uint16x8(vec_popcntu(a.val)); }
|
||||
inline v_uint32x4 v_popcount(const v_uint32x4& a)
|
||||
{ return v_uint32x4(vec_popcntu(a.val)); }
|
||||
inline v_uint32x4 v_popcount(const v_int32x4& a)
|
||||
{ return v_uint32x4(vec_popcntu(a.val)); }
|
||||
inline v_uint64x2 v_popcount(const v_uint64x2& a)
|
||||
{ return v_uint64x2(vec_popcntu(a.val)); }
|
||||
inline v_uint64x2 v_popcount(const v_int64x2& a)
|
||||
{ return v_uint64x2(vec_popcntu(a.val)); }
|
||||
|
||||
/** Mask **/
|
||||
inline int v_signmask(const v_uint8x16& a)
|
||||
{
|
||||
vec_uchar16 sv = vec_sr(a.val, vec_uchar16_sp(7));
|
||||
static const vec_uchar16 slm = {0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7};
|
||||
sv = vec_sl(sv, slm);
|
||||
vec_uint4 sv4 = vec_sum4s(sv, vec_uint4_z);
|
||||
static const vec_uint4 slm4 = {0, 0, 8, 8};
|
||||
sv4 = vec_sl(sv4, slm4);
|
||||
return vec_extract(vec_sums((vec_int4) sv4, vec_int4_z), 3);
|
||||
static const vec_uchar16 qperm = {120, 112, 104, 96, 88, 80, 72, 64, 56, 48, 40, 32, 24, 16, 8, 0};
|
||||
return vec_extract((vec_int4)vec_vbpermq(v_reinterpret_as_u8(a).val, qperm), 2);
|
||||
}
|
||||
inline int v_signmask(const v_int8x16& a)
|
||||
{ return v_signmask(v_reinterpret_as_u8(a)); }
|
||||
|
||||
inline int v_signmask(const v_int16x8& a)
|
||||
{
|
||||
static const vec_ushort8 slm = {0, 1, 2, 3, 4, 5, 6, 7};
|
||||
vec_short8 sv = vec_sr(a.val, vec_ushort8_sp(15));
|
||||
sv = vec_sl(sv, slm);
|
||||
vec_int4 svi = vec_int4_z;
|
||||
svi = vec_sums(vec_sum4s(sv, svi), svi);
|
||||
return vec_extract(svi, 3);
|
||||
static const vec_uchar16 qperm = {112, 96, 80, 64, 48, 32, 16, 0, 128, 128, 128, 128, 128, 128, 128, 128};
|
||||
return vec_extract((vec_int4)vec_vbpermq(v_reinterpret_as_u8(a).val, qperm), 2);
|
||||
}
|
||||
inline int v_signmask(const v_uint16x8& a)
|
||||
{ return v_signmask(v_reinterpret_as_s16(a)); }
|
||||
|
||||
inline int v_signmask(const v_int32x4& a)
|
||||
{
|
||||
static const vec_uint4 slm = {0, 1, 2, 3};
|
||||
vec_int4 sv = vec_sr(a.val, vec_uint4_sp(31));
|
||||
sv = vec_sl(sv, slm);
|
||||
sv = vec_sums(sv, vec_int4_z);
|
||||
return vec_extract(sv, 3);
|
||||
static const vec_uchar16 qperm = {96, 64, 32, 0, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128};
|
||||
return vec_extract((vec_int4)vec_vbpermq(v_reinterpret_as_u8(a).val, qperm), 2);
|
||||
}
|
||||
inline int v_signmask(const v_uint32x4& a)
|
||||
{ return v_signmask(v_reinterpret_as_s32(a)); }
|
||||
|
@ -845,15 +990,28 @@ inline int v_signmask(const v_uint64x2& a)
|
|||
inline int v_signmask(const v_float64x2& a)
|
||||
{ return v_signmask(v_reinterpret_as_s64(a)); }
|
||||
|
||||
inline int v_scan_forward(const v_int8x16& a) { return trailingZeros32(v_signmask(a)); }
|
||||
inline int v_scan_forward(const v_uint8x16& a) { return trailingZeros32(v_signmask(a)); }
|
||||
inline int v_scan_forward(const v_int16x8& a) { return trailingZeros32(v_signmask(a)); }
|
||||
inline int v_scan_forward(const v_uint16x8& a) { return trailingZeros32(v_signmask(a)); }
|
||||
inline int v_scan_forward(const v_int32x4& a) { return trailingZeros32(v_signmask(a)); }
|
||||
inline int v_scan_forward(const v_uint32x4& a) { return trailingZeros32(v_signmask(a)); }
|
||||
inline int v_scan_forward(const v_float32x4& a) { return trailingZeros32(v_signmask(a)); }
|
||||
inline int v_scan_forward(const v_int64x2& a) { return trailingZeros32(v_signmask(a)); }
|
||||
inline int v_scan_forward(const v_uint64x2& a) { return trailingZeros32(v_signmask(a)); }
|
||||
inline int v_scan_forward(const v_float64x2& a) { return trailingZeros32(v_signmask(a)); }
|
||||
|
||||
template<typename _Tpvec>
|
||||
inline bool v_check_all(const _Tpvec& a)
|
||||
{ return vec_all_lt(a.val, _Tpvec().val); }
|
||||
{ return vec_all_lt(a.val, _Tpvec::zero().val); }
|
||||
inline bool v_check_all(const v_uint8x16& a)
|
||||
{ return v_check_all(v_reinterpret_as_s8(a)); }
|
||||
inline bool v_check_all(const v_uint16x8& a)
|
||||
{ return v_check_all(v_reinterpret_as_s16(a)); }
|
||||
inline bool v_check_all(const v_uint32x4& a)
|
||||
{ return v_check_all(v_reinterpret_as_s32(a)); }
|
||||
inline bool v_check_all(const v_uint64x2& a)
|
||||
{ return v_check_all(v_reinterpret_as_s64(a)); }
|
||||
inline bool v_check_all(const v_float32x4& a)
|
||||
{ return v_check_all(v_reinterpret_as_s32(a)); }
|
||||
inline bool v_check_all(const v_float64x2& a)
|
||||
|
@ -861,13 +1019,15 @@ inline bool v_check_all(const v_float64x2& a)
|
|||
|
||||
template<typename _Tpvec>
|
||||
inline bool v_check_any(const _Tpvec& a)
|
||||
{ return vec_any_lt(a.val, _Tpvec().val); }
|
||||
{ return vec_any_lt(a.val, _Tpvec::zero().val); }
|
||||
inline bool v_check_any(const v_uint8x16& a)
|
||||
{ return v_check_any(v_reinterpret_as_s8(a)); }
|
||||
inline bool v_check_any(const v_uint16x8& a)
|
||||
{ return v_check_any(v_reinterpret_as_s16(a)); }
|
||||
inline bool v_check_any(const v_uint32x4& a)
|
||||
{ return v_check_any(v_reinterpret_as_s32(a)); }
|
||||
inline bool v_check_any(const v_uint64x2& a)
|
||||
{ return v_check_any(v_reinterpret_as_s64(a)); }
|
||||
inline bool v_check_any(const v_float32x4& a)
|
||||
{ return v_check_any(v_reinterpret_as_s32(a)); }
|
||||
inline bool v_check_any(const v_float64x2& a)
|
||||
|
@ -994,6 +1154,9 @@ inline v_float64x2 v_cvt_f64(const v_float32x4& a)
|
|||
inline v_float64x2 v_cvt_f64_high(const v_float32x4& a)
|
||||
{ return v_float64x2(vec_cvfo(vec_mergel(a.val, a.val))); }
|
||||
|
||||
inline v_float64x2 v_cvt_f64(const v_int64x2& a)
|
||||
{ return v_float64x2(vec_ctd(a.val)); }
|
||||
|
||||
////////////// Lookup table access ////////////////////
|
||||
|
||||
inline v_int8x16 v_lut(const schar* tab, const int* idx)
|
||||
|
@ -1205,7 +1368,7 @@ inline v_float32x4 v_load_expand(const float16_t* ptr)
|
|||
return v_float32x4(vec_extract_fp_from_shorth(vf16));
|
||||
#elif CV_VSX3 && !defined(CV_COMPILER_VSX_BROKEN_ASM)
|
||||
vec_float4 vf32;
|
||||
__asm__ __volatile__ ("xvcvhpsp %x0,%x1" : "=wf" (vf32) : "wa" (vec_mergeh(vf16, vf16)));
|
||||
__asm__ __volatile__ ("xvcvhpsp %x0,%x1" : "=wa" (vf32) : "wa" (vec_mergeh(vf16, vf16)));
|
||||
return v_float32x4(vf32);
|
||||
#else
|
||||
const vec_int4 z = vec_int4_z, delta = vec_int4_sp(0x38000000);
|
||||
|
@ -1227,10 +1390,10 @@ inline v_float32x4 v_load_expand(const float16_t* ptr)
|
|||
|
||||
inline void v_pack_store(float16_t* ptr, const v_float32x4& v)
|
||||
{
|
||||
// fixme: Is there any buitin op or intrinsic that cover "xvcvsphp"?
|
||||
// fixme: Is there any builtin op or intrinsic that cover "xvcvsphp"?
|
||||
#if CV_VSX3 && !defined(CV_COMPILER_VSX_BROKEN_ASM)
|
||||
vec_ushort8 vf16;
|
||||
__asm__ __volatile__ ("xvcvsphp %x0,%x1" : "=wa" (vf16) : "wf" (v.val));
|
||||
__asm__ __volatile__ ("xvcvsphp %x0,%x1" : "=wa" (vf16) : "wa" (v.val));
|
||||
vec_st_l8(vec_mergesqe(vf16, vf16), ptr);
|
||||
#else
|
||||
const vec_int4 signmask = vec_int4_sp(0x80000000);
|
||||
|
@ -1264,12 +1427,134 @@ inline void v_cleanup() {}
|
|||
|
||||
////////// Matrix operations /////////
|
||||
|
||||
//////// Dot Product ////////
|
||||
// 16 >> 32
|
||||
inline v_int32x4 v_dotprod(const v_int16x8& a, const v_int16x8& b)
|
||||
{ return v_int32x4(vec_msum(a.val, b.val, vec_int4_z)); }
|
||||
|
||||
inline v_int32x4 v_dotprod(const v_int16x8& a, const v_int16x8& b, const v_int32x4& c)
|
||||
{ return v_int32x4(vec_msum(a.val, b.val, c.val)); }
|
||||
|
||||
// 32 >> 64
|
||||
inline v_int64x2 v_dotprod(const v_int32x4& a, const v_int32x4& b)
|
||||
{
|
||||
vec_dword2 even = vec_mule(a.val, b.val);
|
||||
vec_dword2 odd = vec_mulo(a.val, b.val);
|
||||
return v_int64x2(vec_add(even, odd));
|
||||
}
|
||||
inline v_int64x2 v_dotprod(const v_int32x4& a, const v_int32x4& b, const v_int64x2& c)
|
||||
{ return v_dotprod(a, b) + c; }
|
||||
|
||||
// 8 >> 32
|
||||
inline v_uint32x4 v_dotprod_expand(const v_uint8x16& a, const v_uint8x16& b, const v_uint32x4& c)
|
||||
{ return v_uint32x4(vec_msum(a.val, b.val, c.val)); }
|
||||
inline v_uint32x4 v_dotprod_expand(const v_uint8x16& a, const v_uint8x16& b)
|
||||
{ return v_uint32x4(vec_msum(a.val, b.val, vec_uint4_z)); }
|
||||
|
||||
inline v_int32x4 v_dotprod_expand(const v_int8x16& a, const v_int8x16& b)
|
||||
{
|
||||
const vec_ushort8 eight = vec_ushort8_sp(8);
|
||||
vec_short8 a0 = vec_sra((vec_short8)vec_sld(a.val, a.val, 1), eight); // even
|
||||
vec_short8 a1 = vec_sra((vec_short8)a.val, eight); // odd
|
||||
vec_short8 b0 = vec_sra((vec_short8)vec_sld(b.val, b.val, 1), eight);
|
||||
vec_short8 b1 = vec_sra((vec_short8)b.val, eight);
|
||||
return v_int32x4(vec_msum(a0, b0, vec_msum(a1, b1, vec_int4_z)));
|
||||
}
|
||||
|
||||
inline v_int32x4 v_dotprod_expand(const v_int8x16& a, const v_int8x16& b, const v_int32x4& c)
|
||||
{
|
||||
const vec_ushort8 eight = vec_ushort8_sp(8);
|
||||
vec_short8 a0 = vec_sra((vec_short8)vec_sld(a.val, a.val, 1), eight); // even
|
||||
vec_short8 a1 = vec_sra((vec_short8)a.val, eight); // odd
|
||||
vec_short8 b0 = vec_sra((vec_short8)vec_sld(b.val, b.val, 1), eight);
|
||||
vec_short8 b1 = vec_sra((vec_short8)b.val, eight);
|
||||
return v_int32x4(vec_msum(a0, b0, vec_msum(a1, b1, c.val)));
|
||||
}
|
||||
|
||||
// 16 >> 64
|
||||
inline v_uint64x2 v_dotprod_expand(const v_uint16x8& a, const v_uint16x8& b)
|
||||
{
|
||||
const vec_uint4 zero = vec_uint4_z;
|
||||
vec_uint4 even = vec_mule(a.val, b.val);
|
||||
vec_uint4 odd = vec_mulo(a.val, b.val);
|
||||
vec_udword2 e0 = (vec_udword2)vec_mergee(even, zero);
|
||||
vec_udword2 e1 = (vec_udword2)vec_mergeo(even, zero);
|
||||
vec_udword2 o0 = (vec_udword2)vec_mergee(odd, zero);
|
||||
vec_udword2 o1 = (vec_udword2)vec_mergeo(odd, zero);
|
||||
vec_udword2 s0 = vec_add(e0, o0);
|
||||
vec_udword2 s1 = vec_add(e1, o1);
|
||||
return v_uint64x2(vec_add(s0, s1));
|
||||
}
|
||||
inline v_uint64x2 v_dotprod_expand(const v_uint16x8& a, const v_uint16x8& b, const v_uint64x2& c)
|
||||
{ return v_dotprod_expand(a, b) + c; }
|
||||
|
||||
inline v_int64x2 v_dotprod_expand(const v_int16x8& a, const v_int16x8& b)
|
||||
{
|
||||
v_int32x4 prod = v_dotprod(a, b);
|
||||
v_int64x2 c, d;
|
||||
v_expand(prod, c, d);
|
||||
return v_int64x2(vec_add(vec_mergeh(c.val, d.val), vec_mergel(c.val, d.val)));
|
||||
}
|
||||
inline v_int64x2 v_dotprod_expand(const v_int16x8& a, const v_int16x8& b, const v_int64x2& c)
|
||||
{ return v_dotprod_expand(a, b) + c; }
|
||||
|
||||
// 32 >> 64f
|
||||
inline v_float64x2 v_dotprod_expand(const v_int32x4& a, const v_int32x4& b)
|
||||
{ return v_cvt_f64(v_dotprod(a, b)); }
|
||||
inline v_float64x2 v_dotprod_expand(const v_int32x4& a, const v_int32x4& b, const v_float64x2& c)
|
||||
{ return v_dotprod_expand(a, b) + c; }
|
||||
|
||||
//////// Fast Dot Product ////////
|
||||
|
||||
// 16 >> 32
|
||||
inline v_int32x4 v_dotprod_fast(const v_int16x8& a, const v_int16x8& b)
|
||||
{ return v_dotprod(a, b); }
|
||||
inline v_int32x4 v_dotprod_fast(const v_int16x8& a, const v_int16x8& b, const v_int32x4& c)
|
||||
{ return v_int32x4(vec_msum(a.val, b.val, vec_int4_z)) + c; }
|
||||
// 32 >> 64
|
||||
inline v_int64x2 v_dotprod_fast(const v_int32x4& a, const v_int32x4& b)
|
||||
{ return v_dotprod(a, b); }
|
||||
inline v_int64x2 v_dotprod_fast(const v_int32x4& a, const v_int32x4& b, const v_int64x2& c)
|
||||
{ return v_dotprod(a, b, c); }
|
||||
|
||||
// 8 >> 32
|
||||
inline v_uint32x4 v_dotprod_expand_fast(const v_uint8x16& a, const v_uint8x16& b)
|
||||
{ return v_dotprod_expand(a, b); }
|
||||
inline v_uint32x4 v_dotprod_expand_fast(const v_uint8x16& a, const v_uint8x16& b, const v_uint32x4& c)
|
||||
{ return v_uint32x4(vec_msum(a.val, b.val, vec_uint4_z)) + c; }
|
||||
|
||||
inline v_int32x4 v_dotprod_expand_fast(const v_int8x16& a, const v_int8x16& b)
|
||||
{
|
||||
vec_short8 a0 = vec_unpackh(a.val);
|
||||
vec_short8 a1 = vec_unpackl(a.val);
|
||||
vec_short8 b0 = vec_unpackh(b.val);
|
||||
vec_short8 b1 = vec_unpackl(b.val);
|
||||
return v_int32x4(vec_msum(a0, b0, vec_msum(a1, b1, vec_int4_z)));
|
||||
}
|
||||
inline v_int32x4 v_dotprod_expand_fast(const v_int8x16& a, const v_int8x16& b, const v_int32x4& c)
|
||||
{ return v_dotprod_expand_fast(a, b) + c; }
|
||||
|
||||
// 16 >> 64
|
||||
inline v_uint64x2 v_dotprod_expand_fast(const v_uint16x8& a, const v_uint16x8& b)
|
||||
{ return v_dotprod_expand(a, b); }
|
||||
inline v_uint64x2 v_dotprod_expand_fast(const v_uint16x8& a, const v_uint16x8& b, const v_uint64x2& c)
|
||||
{ return v_dotprod_expand(a, b, c); }
|
||||
|
||||
inline v_int64x2 v_dotprod_expand_fast(const v_int16x8& a, const v_int16x8& b)
|
||||
{
|
||||
v_int32x4 prod = v_dotprod(a, b);
|
||||
v_int64x2 c, d;
|
||||
v_expand(prod, c, d);
|
||||
return c + d;
|
||||
}
|
||||
inline v_int64x2 v_dotprod_expand_fast(const v_int16x8& a, const v_int16x8& b, const v_int64x2& c)
|
||||
{ return v_dotprod_expand_fast(a, b) + c; }
|
||||
|
||||
// 32 >> 64f
|
||||
inline v_float64x2 v_dotprod_expand_fast(const v_int32x4& a, const v_int32x4& b)
|
||||
{ return v_dotprod_expand(a, b); }
|
||||
inline v_float64x2 v_dotprod_expand_fast(const v_int32x4& a, const v_int32x4& b, const v_float64x2& c)
|
||||
{ return v_dotprod_expand(a, b, c); }
|
||||
|
||||
inline v_float32x4 v_matmul(const v_float32x4& v, const v_float32x4& m0,
|
||||
const v_float32x4& m1, const v_float32x4& m2,
|
||||
const v_float32x4& m3)
|
||||
|
@ -1309,15 +1594,10 @@ OPENCV_HAL_IMPL_VSX_TRANSPOSE4x4(v_uint32x4, vec_uint4)
|
|||
OPENCV_HAL_IMPL_VSX_TRANSPOSE4x4(v_int32x4, vec_int4)
|
||||
OPENCV_HAL_IMPL_VSX_TRANSPOSE4x4(v_float32x4, vec_float4)
|
||||
|
||||
//! @name Check SIMD support
|
||||
//! @{
|
||||
//! @brief Check CPU capability of SIMD operation
|
||||
static inline bool hasSIMD128()
|
||||
{
|
||||
return (CV_CPU_HAS_SUPPORT_VSX) ? true : false;
|
||||
}
|
||||
template<int i, typename Tvec>
|
||||
inline Tvec v_broadcast_element(const Tvec& v)
|
||||
{ return Tvec(vec_splat(v.val, i)); }
|
||||
|
||||
//! @}
|
||||
|
||||
CV_CPU_OPTIMIZATION_HAL_NAMESPACE_END
|
||||
|
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
|
@ -0,0 +1,146 @@
|
|||
// This file is part of OpenCV project.
|
||||
// It is subject to the license terms in the LICENSE file found in the top-level directory
|
||||
// of this distribution and at http://opencv.org/license.html
|
||||
|
||||
// This header is not standalone. Don't include directly, use "intrin.hpp" instead.
|
||||
#ifdef OPENCV_HAL_INTRIN_HPP // defined in intrin.hpp
|
||||
|
||||
|
||||
#if CV_SIMD128 || CV_SIMD128_CPP
|
||||
|
||||
template<typename _T> struct Type2Vec128_Traits;
|
||||
#define CV_INTRIN_DEF_TYPE2VEC128_TRAITS(type_, vec_type_) \
|
||||
template<> struct Type2Vec128_Traits<type_> \
|
||||
{ \
|
||||
typedef vec_type_ vec_type; \
|
||||
}
|
||||
|
||||
CV_INTRIN_DEF_TYPE2VEC128_TRAITS(uchar, v_uint8x16);
|
||||
CV_INTRIN_DEF_TYPE2VEC128_TRAITS(schar, v_int8x16);
|
||||
CV_INTRIN_DEF_TYPE2VEC128_TRAITS(ushort, v_uint16x8);
|
||||
CV_INTRIN_DEF_TYPE2VEC128_TRAITS(short, v_int16x8);
|
||||
CV_INTRIN_DEF_TYPE2VEC128_TRAITS(unsigned, v_uint32x4);
|
||||
CV_INTRIN_DEF_TYPE2VEC128_TRAITS(int, v_int32x4);
|
||||
CV_INTRIN_DEF_TYPE2VEC128_TRAITS(float, v_float32x4);
|
||||
CV_INTRIN_DEF_TYPE2VEC128_TRAITS(uint64, v_uint64x2);
|
||||
CV_INTRIN_DEF_TYPE2VEC128_TRAITS(int64, v_int64x2);
|
||||
#if CV_SIMD128_64F
|
||||
CV_INTRIN_DEF_TYPE2VEC128_TRAITS(double, v_float64x2);
|
||||
#endif
|
||||
|
||||
template<typename _T> static inline
|
||||
typename Type2Vec128_Traits<_T>::vec_type v_setall(const _T& a);
|
||||
|
||||
template<> inline Type2Vec128_Traits< uchar>::vec_type v_setall< uchar>(const uchar& a) { return v_setall_u8(a); }
|
||||
template<> inline Type2Vec128_Traits< schar>::vec_type v_setall< schar>(const schar& a) { return v_setall_s8(a); }
|
||||
template<> inline Type2Vec128_Traits<ushort>::vec_type v_setall<ushort>(const ushort& a) { return v_setall_u16(a); }
|
||||
template<> inline Type2Vec128_Traits< short>::vec_type v_setall< short>(const short& a) { return v_setall_s16(a); }
|
||||
template<> inline Type2Vec128_Traits< uint>::vec_type v_setall< uint>(const uint& a) { return v_setall_u32(a); }
|
||||
template<> inline Type2Vec128_Traits< int>::vec_type v_setall< int>(const int& a) { return v_setall_s32(a); }
|
||||
template<> inline Type2Vec128_Traits<uint64>::vec_type v_setall<uint64>(const uint64& a) { return v_setall_u64(a); }
|
||||
template<> inline Type2Vec128_Traits< int64>::vec_type v_setall< int64>(const int64& a) { return v_setall_s64(a); }
|
||||
template<> inline Type2Vec128_Traits< float>::vec_type v_setall< float>(const float& a) { return v_setall_f32(a); }
|
||||
#if CV_SIMD128_64F
|
||||
template<> inline Type2Vec128_Traits<double>::vec_type v_setall<double>(const double& a) { return v_setall_f64(a); }
|
||||
#endif
|
||||
|
||||
#endif // SIMD128
|
||||
|
||||
|
||||
#if CV_SIMD256
|
||||
|
||||
template<typename _T> struct Type2Vec256_Traits;
|
||||
#define CV_INTRIN_DEF_TYPE2VEC256_TRAITS(type_, vec_type_) \
|
||||
template<> struct Type2Vec256_Traits<type_> \
|
||||
{ \
|
||||
typedef vec_type_ vec_type; \
|
||||
}
|
||||
|
||||
CV_INTRIN_DEF_TYPE2VEC256_TRAITS(uchar, v_uint8x32);
|
||||
CV_INTRIN_DEF_TYPE2VEC256_TRAITS(schar, v_int8x32);
|
||||
CV_INTRIN_DEF_TYPE2VEC256_TRAITS(ushort, v_uint16x16);
|
||||
CV_INTRIN_DEF_TYPE2VEC256_TRAITS(short, v_int16x16);
|
||||
CV_INTRIN_DEF_TYPE2VEC256_TRAITS(unsigned, v_uint32x8);
|
||||
CV_INTRIN_DEF_TYPE2VEC256_TRAITS(int, v_int32x8);
|
||||
CV_INTRIN_DEF_TYPE2VEC256_TRAITS(float, v_float32x8);
|
||||
CV_INTRIN_DEF_TYPE2VEC256_TRAITS(uint64, v_uint64x4);
|
||||
CV_INTRIN_DEF_TYPE2VEC256_TRAITS(int64, v_int64x4);
|
||||
#if CV_SIMD256_64F
|
||||
CV_INTRIN_DEF_TYPE2VEC256_TRAITS(double, v_float64x4);
|
||||
#endif
|
||||
|
||||
template<typename _T> static inline
|
||||
typename Type2Vec256_Traits<_T>::vec_type v256_setall(const _T& a);
|
||||
|
||||
template<> inline Type2Vec256_Traits< uchar>::vec_type v256_setall< uchar>(const uchar& a) { return v256_setall_u8(a); }
|
||||
template<> inline Type2Vec256_Traits< schar>::vec_type v256_setall< schar>(const schar& a) { return v256_setall_s8(a); }
|
||||
template<> inline Type2Vec256_Traits<ushort>::vec_type v256_setall<ushort>(const ushort& a) { return v256_setall_u16(a); }
|
||||
template<> inline Type2Vec256_Traits< short>::vec_type v256_setall< short>(const short& a) { return v256_setall_s16(a); }
|
||||
template<> inline Type2Vec256_Traits< uint>::vec_type v256_setall< uint>(const uint& a) { return v256_setall_u32(a); }
|
||||
template<> inline Type2Vec256_Traits< int>::vec_type v256_setall< int>(const int& a) { return v256_setall_s32(a); }
|
||||
template<> inline Type2Vec256_Traits<uint64>::vec_type v256_setall<uint64>(const uint64& a) { return v256_setall_u64(a); }
|
||||
template<> inline Type2Vec256_Traits< int64>::vec_type v256_setall< int64>(const int64& a) { return v256_setall_s64(a); }
|
||||
template<> inline Type2Vec256_Traits< float>::vec_type v256_setall< float>(const float& a) { return v256_setall_f32(a); }
|
||||
#if CV_SIMD256_64F
|
||||
template<> inline Type2Vec256_Traits<double>::vec_type v256_setall<double>(const double& a) { return v256_setall_f64(a); }
|
||||
#endif
|
||||
|
||||
#endif // SIMD256
|
||||
|
||||
|
||||
#if CV_SIMD512
|
||||
|
||||
template<typename _T> struct Type2Vec512_Traits;
|
||||
#define CV_INTRIN_DEF_TYPE2VEC512_TRAITS(type_, vec_type_) \
|
||||
template<> struct Type2Vec512_Traits<type_> \
|
||||
{ \
|
||||
typedef vec_type_ vec_type; \
|
||||
}
|
||||
|
||||
CV_INTRIN_DEF_TYPE2VEC512_TRAITS(uchar, v_uint8x64);
|
||||
CV_INTRIN_DEF_TYPE2VEC512_TRAITS(schar, v_int8x64);
|
||||
CV_INTRIN_DEF_TYPE2VEC512_TRAITS(ushort, v_uint16x32);
|
||||
CV_INTRIN_DEF_TYPE2VEC512_TRAITS(short, v_int16x32);
|
||||
CV_INTRIN_DEF_TYPE2VEC512_TRAITS(unsigned, v_uint32x16);
|
||||
CV_INTRIN_DEF_TYPE2VEC512_TRAITS(int, v_int32x16);
|
||||
CV_INTRIN_DEF_TYPE2VEC512_TRAITS(float, v_float32x16);
|
||||
CV_INTRIN_DEF_TYPE2VEC512_TRAITS(uint64, v_uint64x8);
|
||||
CV_INTRIN_DEF_TYPE2VEC512_TRAITS(int64, v_int64x8);
|
||||
#if CV_SIMD512_64F
|
||||
CV_INTRIN_DEF_TYPE2VEC512_TRAITS(double, v_float64x8);
|
||||
#endif
|
||||
|
||||
template<typename _T> static inline
|
||||
typename Type2Vec512_Traits<_T>::vec_type v512_setall(const _T& a);
|
||||
|
||||
template<> inline Type2Vec512_Traits< uchar>::vec_type v512_setall< uchar>(const uchar& a) { return v512_setall_u8(a); }
|
||||
template<> inline Type2Vec512_Traits< schar>::vec_type v512_setall< schar>(const schar& a) { return v512_setall_s8(a); }
|
||||
template<> inline Type2Vec512_Traits<ushort>::vec_type v512_setall<ushort>(const ushort& a) { return v512_setall_u16(a); }
|
||||
template<> inline Type2Vec512_Traits< short>::vec_type v512_setall< short>(const short& a) { return v512_setall_s16(a); }
|
||||
template<> inline Type2Vec512_Traits< uint>::vec_type v512_setall< uint>(const uint& a) { return v512_setall_u32(a); }
|
||||
template<> inline Type2Vec512_Traits< int>::vec_type v512_setall< int>(const int& a) { return v512_setall_s32(a); }
|
||||
template<> inline Type2Vec512_Traits<uint64>::vec_type v512_setall<uint64>(const uint64& a) { return v512_setall_u64(a); }
|
||||
template<> inline Type2Vec512_Traits< int64>::vec_type v512_setall< int64>(const int64& a) { return v512_setall_s64(a); }
|
||||
template<> inline Type2Vec512_Traits< float>::vec_type v512_setall< float>(const float& a) { return v512_setall_f32(a); }
|
||||
#if CV_SIMD512_64F
|
||||
template<> inline Type2Vec512_Traits<double>::vec_type v512_setall<double>(const double& a) { return v512_setall_f64(a); }
|
||||
#endif
|
||||
|
||||
#endif // SIMD512
|
||||
|
||||
|
||||
#if CV_SIMD_WIDTH == 16
|
||||
template<typename _T> static inline
|
||||
typename Type2Vec128_Traits<_T>::vec_type vx_setall(const _T& a) { return v_setall(a); }
|
||||
#elif CV_SIMD_WIDTH == 32
|
||||
template<typename _T> static inline
|
||||
typename Type2Vec256_Traits<_T>::vec_type vx_setall(const _T& a) { return v256_setall(a); }
|
||||
#elif CV_SIMD_WIDTH == 64
|
||||
template<typename _T> static inline
|
||||
typename Type2Vec512_Traits<_T>::vec_type vx_setall(const _T& a) { return v512_setall(a); }
|
||||
#else
|
||||
#error "Build configuration error, unsupported CV_SIMD_WIDTH"
|
||||
#endif
|
||||
|
||||
|
||||
#endif // OPENCV_HAL_INTRIN_HPP
|
|
@ -151,7 +151,7 @@ number of components (vectors/matrices) of the outer vector.
|
|||
|
||||
In general, type support is limited to cv::Mat types. Other types are forbidden.
|
||||
But in some cases we need to support passing of custom non-general Mat types, like arrays of cv::KeyPoint, cv::DMatch, etc.
|
||||
This data is not intented to be interpreted as an image data, or processed somehow like regular cv::Mat.
|
||||
This data is not intended to be interpreted as an image data, or processed somehow like regular cv::Mat.
|
||||
To pass such custom type use rawIn() / rawOut() / rawInOut() wrappers.
|
||||
Custom type is wrapped as Mat-compatible `CV_8UC<N>` values (N = sizeof(T), N <= CV_CN_MAX).
|
||||
*/
|
||||
|
@ -170,7 +170,9 @@ public:
|
|||
STD_VECTOR = 3 << KIND_SHIFT,
|
||||
STD_VECTOR_VECTOR = 4 << KIND_SHIFT,
|
||||
STD_VECTOR_MAT = 5 << KIND_SHIFT,
|
||||
EXPR = 6 << KIND_SHIFT,
|
||||
#if OPENCV_ABI_COMPATIBILITY < 500
|
||||
EXPR = 6 << KIND_SHIFT, //!< removed: https://github.com/opencv/opencv/pull/17046
|
||||
#endif
|
||||
OPENGL_BUFFER = 7 << KIND_SHIFT,
|
||||
CUDA_HOST_MEM = 8 << KIND_SHIFT,
|
||||
CUDA_GPU_MAT = 9 << KIND_SHIFT,
|
||||
|
@ -178,7 +180,9 @@ public:
|
|||
STD_VECTOR_UMAT =11 << KIND_SHIFT,
|
||||
STD_BOOL_VECTOR =12 << KIND_SHIFT,
|
||||
STD_VECTOR_CUDA_GPU_MAT = 13 << KIND_SHIFT,
|
||||
STD_ARRAY =14 << KIND_SHIFT,
|
||||
#if OPENCV_ABI_COMPATIBILITY < 500
|
||||
STD_ARRAY =14 << KIND_SHIFT, //!< removed: https://github.com/opencv/opencv/issues/18897
|
||||
#endif
|
||||
STD_ARRAY_MAT =15 << KIND_SHIFT
|
||||
};
|
||||
|
||||
|
@ -377,6 +381,9 @@ public:
|
|||
|
||||
void assign(const std::vector<UMat>& v) const;
|
||||
void assign(const std::vector<Mat>& v) const;
|
||||
|
||||
void move(UMat& u) const;
|
||||
void move(Mat& m) const;
|
||||
};
|
||||
|
||||
|
||||
|
@ -576,24 +583,24 @@ struct CV_EXPORTS UMatData
|
|||
|
||||
struct CV_EXPORTS MatSize
|
||||
{
|
||||
explicit MatSize(int* _p);
|
||||
int dims() const;
|
||||
explicit MatSize(int* _p) CV_NOEXCEPT;
|
||||
int dims() const CV_NOEXCEPT;
|
||||
Size operator()() const;
|
||||
const int& operator[](int i) const;
|
||||
int& operator[](int i);
|
||||
operator const int*() const; // TODO OpenCV 4.0: drop this
|
||||
bool operator == (const MatSize& sz) const;
|
||||
bool operator != (const MatSize& sz) const;
|
||||
operator const int*() const CV_NOEXCEPT; // TODO OpenCV 4.0: drop this
|
||||
bool operator == (const MatSize& sz) const CV_NOEXCEPT;
|
||||
bool operator != (const MatSize& sz) const CV_NOEXCEPT;
|
||||
|
||||
int* p;
|
||||
};
|
||||
|
||||
struct CV_EXPORTS MatStep
|
||||
{
|
||||
MatStep();
|
||||
explicit MatStep(size_t s);
|
||||
const size_t& operator[](int i) const;
|
||||
size_t& operator[](int i);
|
||||
MatStep() CV_NOEXCEPT;
|
||||
explicit MatStep(size_t s) CV_NOEXCEPT;
|
||||
const size_t& operator[](int i) const CV_NOEXCEPT;
|
||||
size_t& operator[](int i) CV_NOEXCEPT;
|
||||
operator size_t() const;
|
||||
MatStep& operator = (size_t s);
|
||||
|
||||
|
@ -699,11 +706,16 @@ sub-matrices.
|
|||
-# Process "foreign" data using OpenCV (for example, when you implement a DirectShow\* filter or
|
||||
a processing module for gstreamer, and so on). For example:
|
||||
@code
|
||||
void process_video_frame(const unsigned char* pixels,
|
||||
int width, int height, int step)
|
||||
Mat process_video_frame(const unsigned char* pixels,
|
||||
int width, int height, int step)
|
||||
{
|
||||
Mat img(height, width, CV_8UC3, pixels, step);
|
||||
GaussianBlur(img, img, Size(7,7), 1.5, 1.5);
|
||||
// wrap input buffer
|
||||
Mat img(height, width, CV_8UC3, (unsigned char*)pixels, step);
|
||||
|
||||
Mat result;
|
||||
GaussianBlur(img, result, Size(7, 7), 1.5, 1.5);
|
||||
|
||||
return result;
|
||||
}
|
||||
@endcode
|
||||
-# Quickly initialize small matrices and/or get a super-fast element access.
|
||||
|
@ -807,7 +819,7 @@ public:
|
|||
The constructed matrix can further be assigned to another matrix or matrix expression or can be
|
||||
allocated with Mat::create . In the former case, the old content is de-referenced.
|
||||
*/
|
||||
Mat();
|
||||
Mat() CV_NOEXCEPT;
|
||||
|
||||
/** @overload
|
||||
@param rows Number of rows in a 2D array.
|
||||
|
@ -2208,7 +2220,7 @@ public:
|
|||
typedef MatConstIterator_<_Tp> const_iterator;
|
||||
|
||||
//! default constructor
|
||||
Mat_();
|
||||
Mat_() CV_NOEXCEPT;
|
||||
//! equivalent to Mat(_rows, _cols, DataType<_Tp>::type)
|
||||
Mat_(int _rows, int _cols);
|
||||
//! constructor that sets each matrix element to specified value
|
||||
|
@ -2408,12 +2420,12 @@ class CV_EXPORTS UMat
|
|||
{
|
||||
public:
|
||||
//! default constructor
|
||||
UMat(UMatUsageFlags usageFlags = USAGE_DEFAULT);
|
||||
UMat(UMatUsageFlags usageFlags = USAGE_DEFAULT) CV_NOEXCEPT;
|
||||
//! constructs 2D matrix of the specified size and type
|
||||
// (_type is CV_8UC1, CV_64FC3, CV_32SC(12) etc.)
|
||||
UMat(int rows, int cols, int type, UMatUsageFlags usageFlags = USAGE_DEFAULT);
|
||||
UMat(Size size, int type, UMatUsageFlags usageFlags = USAGE_DEFAULT);
|
||||
//! constucts 2D matrix and fills it with the specified value _s.
|
||||
//! constructs 2D matrix and fills it with the specified value _s.
|
||||
UMat(int rows, int cols, int type, const Scalar& s, UMatUsageFlags usageFlags = USAGE_DEFAULT);
|
||||
UMat(Size size, int type, const Scalar& s, UMatUsageFlags usageFlags = USAGE_DEFAULT);
|
||||
|
||||
|
@ -2429,20 +2441,11 @@ public:
|
|||
UMat(const UMat& m, const Rect& roi);
|
||||
UMat(const UMat& m, const Range* ranges);
|
||||
UMat(const UMat& m, const std::vector<Range>& ranges);
|
||||
|
||||
// FIXIT copyData=false is not implemented, drop this in favor of cv::Mat (OpenCV 5.0)
|
||||
//! builds matrix from std::vector with or without copying the data
|
||||
template<typename _Tp> explicit UMat(const std::vector<_Tp>& vec, bool copyData=false);
|
||||
|
||||
//! builds matrix from cv::Vec; the data is copied by default
|
||||
template<typename _Tp, int n> explicit UMat(const Vec<_Tp, n>& vec, bool copyData=true);
|
||||
//! builds matrix from cv::Matx; the data is copied by default
|
||||
template<typename _Tp, int m, int n> explicit UMat(const Matx<_Tp, m, n>& mtx, bool copyData=true);
|
||||
//! builds matrix from a 2D point
|
||||
template<typename _Tp> explicit UMat(const Point_<_Tp>& pt, bool copyData=true);
|
||||
//! builds matrix from a 3D point
|
||||
template<typename _Tp> explicit UMat(const Point3_<_Tp>& pt, bool copyData=true);
|
||||
//! builds matrix from comma initializer
|
||||
template<typename _Tp> explicit UMat(const MatCommaInitializer_<_Tp>& commaInitializer);
|
||||
|
||||
//! destructor - calls release()
|
||||
~UMat();
|
||||
//! assignment operators
|
||||
|
@ -2860,7 +2863,7 @@ public:
|
|||
|
||||
`ref<_Tp>(i0,...[,hashval])` is equivalent to `*(_Tp*)ptr(i0,...,true[,hashval])`.
|
||||
The methods always return a valid reference.
|
||||
If the element did not exist, it is created and initialiazed with 0.
|
||||
If the element did not exist, it is created and initialized with 0.
|
||||
*/
|
||||
//! returns reference to the specified element (1D case)
|
||||
template<typename _Tp> _Tp& ref(int i0, size_t* hashval=0);
|
||||
|
@ -3577,6 +3580,8 @@ public:
|
|||
Mat cross(const Mat& m) const;
|
||||
double dot(const Mat& m) const;
|
||||
|
||||
void swap(MatExpr& b);
|
||||
|
||||
const MatOp* op;
|
||||
int flags;
|
||||
|
|
@ -54,6 +54,21 @@
|
|||
#pragma warning( disable: 4127 )
|
||||
#endif
|
||||
|
||||
#if defined(CV_SKIP_DISABLE_CLANG_ENUM_WARNINGS)
|
||||
// nothing
|
||||
#elif defined(CV_FORCE_DISABLE_CLANG_ENUM_WARNINGS)
|
||||
#define CV_DISABLE_CLANG_ENUM_WARNINGS
|
||||
#elif defined(__clang__) && defined(__has_warning)
|
||||
#if __has_warning("-Wdeprecated-enum-enum-conversion") && __has_warning("-Wdeprecated-anon-enum-enum-conversion")
|
||||
#define CV_DISABLE_CLANG_ENUM_WARNINGS
|
||||
#endif
|
||||
#endif
|
||||
#ifdef CV_DISABLE_CLANG_ENUM_WARNINGS
|
||||
#pragma clang diagnostic push
|
||||
#pragma clang diagnostic ignored "-Wdeprecated-enum-enum-conversion"
|
||||
#pragma clang diagnostic ignored "-Wdeprecated-anon-enum-enum-conversion"
|
||||
#endif
|
||||
|
||||
namespace cv
|
||||
{
|
||||
CV__DEBUG_NS_BEGIN
|
||||
|
@ -97,7 +112,7 @@ _InputArray::_InputArray(const std::vector<_Tp>& vec)
|
|||
#ifdef CV_CXX_STD_ARRAY
|
||||
template<typename _Tp, std::size_t _Nm> inline
|
||||
_InputArray::_InputArray(const std::array<_Tp, _Nm>& arr)
|
||||
{ init(FIXED_TYPE + FIXED_SIZE + STD_ARRAY + traits::Type<_Tp>::value + ACCESS_READ, arr.data(), Size(1, _Nm)); }
|
||||
{ init(FIXED_TYPE + FIXED_SIZE + MATX + traits::Type<_Tp>::value + ACCESS_READ, arr.data(), Size(1, _Nm)); }
|
||||
|
||||
template<std::size_t _Nm> inline
|
||||
_InputArray::_InputArray(const std::array<Mat, _Nm>& arr)
|
||||
|
@ -135,9 +150,6 @@ _InputArray::_InputArray(const Mat_<_Tp>& m)
|
|||
inline _InputArray::_InputArray(const double& val)
|
||||
{ init(FIXED_TYPE + FIXED_SIZE + MATX + CV_64F + ACCESS_READ, &val, Size(1,1)); }
|
||||
|
||||
inline _InputArray::_InputArray(const MatExpr& expr)
|
||||
{ init(FIXED_TYPE + FIXED_SIZE + EXPR + ACCESS_READ, &expr); }
|
||||
|
||||
inline _InputArray::_InputArray(const cuda::GpuMat& d_mat)
|
||||
{ init(CUDA_GPU_MAT + ACCESS_READ, &d_mat); }
|
||||
|
||||
|
@ -164,7 +176,7 @@ template<typename _Tp, std::size_t _Nm> inline
|
|||
_InputArray _InputArray::rawIn(const std::array<_Tp, _Nm>& arr)
|
||||
{
|
||||
_InputArray v;
|
||||
v.flags = FIXED_TYPE + FIXED_SIZE + STD_ARRAY + traits::Type<_Tp>::value + ACCESS_READ;
|
||||
v.flags = FIXED_TYPE + FIXED_SIZE + MATX + traits::Type<_Tp>::value + ACCESS_READ;
|
||||
v.obj = (void*)arr.data();
|
||||
v.sz = Size(1, _Nm);
|
||||
return v;
|
||||
|
@ -187,7 +199,7 @@ inline bool _InputArray::isUMatVector() const { return kind() == _InputArray::S
|
|||
inline bool _InputArray::isMatx() const { return kind() == _InputArray::MATX; }
|
||||
inline bool _InputArray::isVector() const { return kind() == _InputArray::STD_VECTOR ||
|
||||
kind() == _InputArray::STD_BOOL_VECTOR ||
|
||||
kind() == _InputArray::STD_ARRAY; }
|
||||
(kind() == _InputArray::MATX && (sz.width <= 1 || sz.height <= 1)); }
|
||||
inline bool _InputArray::isGpuMat() const { return kind() == _InputArray::CUDA_GPU_MAT; }
|
||||
inline bool _InputArray::isGpuMatVector() const { return kind() == _InputArray::STD_VECTOR_CUDA_GPU_MAT; }
|
||||
|
||||
|
@ -207,7 +219,7 @@ _OutputArray::_OutputArray(std::vector<_Tp>& vec)
|
|||
#ifdef CV_CXX_STD_ARRAY
|
||||
template<typename _Tp, std::size_t _Nm> inline
|
||||
_OutputArray::_OutputArray(std::array<_Tp, _Nm>& arr)
|
||||
{ init(FIXED_TYPE + FIXED_SIZE + STD_ARRAY + traits::Type<_Tp>::value + ACCESS_WRITE, arr.data(), Size(1, _Nm)); }
|
||||
{ init(FIXED_TYPE + FIXED_SIZE + MATX + traits::Type<_Tp>::value + ACCESS_WRITE, arr.data(), Size(1, _Nm)); }
|
||||
|
||||
template<std::size_t _Nm> inline
|
||||
_OutputArray::_OutputArray(std::array<Mat, _Nm>& arr)
|
||||
|
@ -249,7 +261,7 @@ _OutputArray::_OutputArray(const std::vector<_Tp>& vec)
|
|||
#ifdef CV_CXX_STD_ARRAY
|
||||
template<typename _Tp, std::size_t _Nm> inline
|
||||
_OutputArray::_OutputArray(const std::array<_Tp, _Nm>& arr)
|
||||
{ init(FIXED_TYPE + FIXED_SIZE + STD_ARRAY + traits::Type<_Tp>::value + ACCESS_WRITE, arr.data(), Size(1, _Nm)); }
|
||||
{ init(FIXED_TYPE + FIXED_SIZE + MATX + traits::Type<_Tp>::value + ACCESS_WRITE, arr.data(), Size(1, _Nm)); }
|
||||
|
||||
template<std::size_t _Nm> inline
|
||||
_OutputArray::_OutputArray(const std::array<Mat, _Nm>& arr)
|
||||
|
@ -324,7 +336,7 @@ template<typename _Tp, std::size_t _Nm> inline
|
|||
_OutputArray _OutputArray::rawOut(std::array<_Tp, _Nm>& arr)
|
||||
{
|
||||
_OutputArray v;
|
||||
v.flags = FIXED_TYPE + FIXED_SIZE + STD_ARRAY + traits::Type<_Tp>::value + ACCESS_WRITE;
|
||||
v.flags = FIXED_TYPE + FIXED_SIZE + MATX + traits::Type<_Tp>::value + ACCESS_WRITE;
|
||||
v.obj = (void*)arr.data();
|
||||
v.sz = Size(1, _Nm);
|
||||
return v;
|
||||
|
@ -347,7 +359,7 @@ _InputOutputArray::_InputOutputArray(std::vector<_Tp>& vec)
|
|||
#ifdef CV_CXX_STD_ARRAY
|
||||
template<typename _Tp, std::size_t _Nm> inline
|
||||
_InputOutputArray::_InputOutputArray(std::array<_Tp, _Nm>& arr)
|
||||
{ init(FIXED_TYPE + FIXED_SIZE + STD_ARRAY + traits::Type<_Tp>::value + ACCESS_RW, arr.data(), Size(1, _Nm)); }
|
||||
{ init(FIXED_TYPE + FIXED_SIZE + MATX + traits::Type<_Tp>::value + ACCESS_RW, arr.data(), Size(1, _Nm)); }
|
||||
|
||||
template<std::size_t _Nm> inline
|
||||
_InputOutputArray::_InputOutputArray(std::array<Mat, _Nm>& arr)
|
||||
|
@ -384,7 +396,7 @@ _InputOutputArray::_InputOutputArray(const std::vector<_Tp>& vec)
|
|||
#ifdef CV_CXX_STD_ARRAY
|
||||
template<typename _Tp, std::size_t _Nm> inline
|
||||
_InputOutputArray::_InputOutputArray(const std::array<_Tp, _Nm>& arr)
|
||||
{ init(FIXED_TYPE + FIXED_SIZE + STD_ARRAY + traits::Type<_Tp>::value + ACCESS_RW, arr.data(), Size(1, _Nm)); }
|
||||
{ init(FIXED_TYPE + FIXED_SIZE + MATX + traits::Type<_Tp>::value + ACCESS_RW, arr.data(), Size(1, _Nm)); }
|
||||
|
||||
template<std::size_t _Nm> inline
|
||||
_InputOutputArray::_InputOutputArray(const std::array<Mat, _Nm>& arr)
|
||||
|
@ -461,7 +473,7 @@ template<typename _Tp, std::size_t _Nm> inline
|
|||
_InputOutputArray _InputOutputArray::rawInOut(std::array<_Tp, _Nm>& arr)
|
||||
{
|
||||
_InputOutputArray v;
|
||||
v.flags = FIXED_TYPE + FIXED_SIZE + STD_ARRAY + traits::Type<_Tp>::value + ACCESS_RW;
|
||||
v.flags = FIXED_TYPE + FIXED_SIZE + MATX + traits::Type<_Tp>::value + ACCESS_RW;
|
||||
v.obj = (void*)arr.data();
|
||||
v.sz = Size(1, _Nm);
|
||||
return v;
|
||||
|
@ -477,158 +489,6 @@ CV__DEBUG_NS_END
|
|||
|
||||
//////////////////////////////////////////// Mat //////////////////////////////////////////
|
||||
|
||||
inline
|
||||
Mat::Mat()
|
||||
: flags(MAGIC_VAL), dims(0), rows(0), cols(0), data(0), datastart(0), dataend(0),
|
||||
datalimit(0), allocator(0), u(0), size(&rows), step(0)
|
||||
{}
|
||||
|
||||
inline
|
||||
Mat::Mat(int _rows, int _cols, int _type)
|
||||
: flags(MAGIC_VAL), dims(0), rows(0), cols(0), data(0), datastart(0), dataend(0),
|
||||
datalimit(0), allocator(0), u(0), size(&rows), step(0)
|
||||
{
|
||||
create(_rows, _cols, _type);
|
||||
}
|
||||
|
||||
inline
|
||||
Mat::Mat(int _rows, int _cols, int _type, const Scalar& _s)
|
||||
: flags(MAGIC_VAL), dims(0), rows(0), cols(0), data(0), datastart(0), dataend(0),
|
||||
datalimit(0), allocator(0), u(0), size(&rows), step(0)
|
||||
{
|
||||
create(_rows, _cols, _type);
|
||||
*this = _s;
|
||||
}
|
||||
|
||||
inline
|
||||
Mat::Mat(Size _sz, int _type)
|
||||
: flags(MAGIC_VAL), dims(0), rows(0), cols(0), data(0), datastart(0), dataend(0),
|
||||
datalimit(0), allocator(0), u(0), size(&rows), step(0)
|
||||
{
|
||||
create( _sz.height, _sz.width, _type );
|
||||
}
|
||||
|
||||
inline
|
||||
Mat::Mat(Size _sz, int _type, const Scalar& _s)
|
||||
: flags(MAGIC_VAL), dims(0), rows(0), cols(0), data(0), datastart(0), dataend(0),
|
||||
datalimit(0), allocator(0), u(0), size(&rows), step(0)
|
||||
{
|
||||
create(_sz.height, _sz.width, _type);
|
||||
*this = _s;
|
||||
}
|
||||
|
||||
inline
|
||||
Mat::Mat(int _dims, const int* _sz, int _type)
|
||||
: flags(MAGIC_VAL), dims(0), rows(0), cols(0), data(0), datastart(0), dataend(0),
|
||||
datalimit(0), allocator(0), u(0), size(&rows), step(0)
|
||||
{
|
||||
create(_dims, _sz, _type);
|
||||
}
|
||||
|
||||
inline
|
||||
Mat::Mat(int _dims, const int* _sz, int _type, const Scalar& _s)
|
||||
: flags(MAGIC_VAL), dims(0), rows(0), cols(0), data(0), datastart(0), dataend(0),
|
||||
datalimit(0), allocator(0), u(0), size(&rows), step(0)
|
||||
{
|
||||
create(_dims, _sz, _type);
|
||||
*this = _s;
|
||||
}
|
||||
|
||||
inline
|
||||
Mat::Mat(const std::vector<int>& _sz, int _type)
|
||||
: flags(MAGIC_VAL), dims(0), rows(0), cols(0), data(0), datastart(0), dataend(0),
|
||||
datalimit(0), allocator(0), u(0), size(&rows), step(0)
|
||||
{
|
||||
create(_sz, _type);
|
||||
}
|
||||
|
||||
inline
|
||||
Mat::Mat(const std::vector<int>& _sz, int _type, const Scalar& _s)
|
||||
: flags(MAGIC_VAL), dims(0), rows(0), cols(0), data(0), datastart(0), dataend(0),
|
||||
datalimit(0), allocator(0), u(0), size(&rows), step(0)
|
||||
{
|
||||
create(_sz, _type);
|
||||
*this = _s;
|
||||
}
|
||||
|
||||
inline
|
||||
Mat::Mat(const Mat& m)
|
||||
: flags(m.flags), dims(m.dims), rows(m.rows), cols(m.cols), data(m.data),
|
||||
datastart(m.datastart), dataend(m.dataend), datalimit(m.datalimit), allocator(m.allocator),
|
||||
u(m.u), size(&rows), step(0)
|
||||
{
|
||||
if( u )
|
||||
CV_XADD(&u->refcount, 1);
|
||||
if( m.dims <= 2 )
|
||||
{
|
||||
step[0] = m.step[0]; step[1] = m.step[1];
|
||||
}
|
||||
else
|
||||
{
|
||||
dims = 0;
|
||||
copySize(m);
|
||||
}
|
||||
}
|
||||
|
||||
inline
|
||||
Mat::Mat(int _rows, int _cols, int _type, void* _data, size_t _step)
|
||||
: flags(MAGIC_VAL + (_type & TYPE_MASK)), dims(2), rows(_rows), cols(_cols),
|
||||
data((uchar*)_data), datastart((uchar*)_data), dataend(0), datalimit(0),
|
||||
allocator(0), u(0), size(&rows)
|
||||
{
|
||||
CV_Assert(total() == 0 || data != NULL);
|
||||
|
||||
size_t esz = CV_ELEM_SIZE(_type), esz1 = CV_ELEM_SIZE1(_type);
|
||||
size_t minstep = cols * esz;
|
||||
if( _step == AUTO_STEP )
|
||||
{
|
||||
_step = minstep;
|
||||
}
|
||||
else
|
||||
{
|
||||
CV_DbgAssert( _step >= minstep );
|
||||
if (_step % esz1 != 0)
|
||||
{
|
||||
CV_Error(Error::BadStep, "Step must be a multiple of esz1");
|
||||
}
|
||||
}
|
||||
step[0] = _step;
|
||||
step[1] = esz;
|
||||
datalimit = datastart + _step * rows;
|
||||
dataend = datalimit - _step + minstep;
|
||||
updateContinuityFlag();
|
||||
}
|
||||
|
||||
inline
|
||||
Mat::Mat(Size _sz, int _type, void* _data, size_t _step)
|
||||
: flags(MAGIC_VAL + (_type & TYPE_MASK)), dims(2), rows(_sz.height), cols(_sz.width),
|
||||
data((uchar*)_data), datastart((uchar*)_data), dataend(0), datalimit(0),
|
||||
allocator(0), u(0), size(&rows)
|
||||
{
|
||||
CV_Assert(total() == 0 || data != NULL);
|
||||
|
||||
size_t esz = CV_ELEM_SIZE(_type), esz1 = CV_ELEM_SIZE1(_type);
|
||||
size_t minstep = cols*esz;
|
||||
if( _step == AUTO_STEP )
|
||||
{
|
||||
_step = minstep;
|
||||
}
|
||||
else
|
||||
{
|
||||
CV_DbgAssert( _step >= minstep );
|
||||
|
||||
if (_step % esz1 != 0)
|
||||
{
|
||||
CV_Error(Error::BadStep, "Step must be a multiple of esz1");
|
||||
}
|
||||
}
|
||||
step[0] = _step;
|
||||
step[1] = esz;
|
||||
datalimit = datastart + _step*rows;
|
||||
dataend = datalimit - _step + minstep;
|
||||
updateContinuityFlag();
|
||||
}
|
||||
|
||||
template<typename _Tp> inline
|
||||
Mat::Mat(const std::vector<_Tp>& vec, bool copyData)
|
||||
: flags(MAGIC_VAL | traits::Type<_Tp>::value | CV_MAT_CONT_FLAG), dims(2), rows((int)vec.size()),
|
||||
|
@ -766,43 +626,6 @@ Mat::Mat(const MatCommaInitializer_<_Tp>& commaInitializer)
|
|||
*this = commaInitializer.operator Mat_<_Tp>();
|
||||
}
|
||||
|
||||
inline
|
||||
Mat::~Mat()
|
||||
{
|
||||
release();
|
||||
if( step.p != step.buf )
|
||||
fastFree(step.p);
|
||||
}
|
||||
|
||||
inline
|
||||
Mat& Mat::operator = (const Mat& m)
|
||||
{
|
||||
if( this != &m )
|
||||
{
|
||||
if( m.u )
|
||||
CV_XADD(&m.u->refcount, 1);
|
||||
release();
|
||||
flags = m.flags;
|
||||
if( dims <= 2 && m.dims <= 2 )
|
||||
{
|
||||
dims = m.dims;
|
||||
rows = m.rows;
|
||||
cols = m.cols;
|
||||
step[0] = m.step[0];
|
||||
step[1] = m.step[1];
|
||||
}
|
||||
else
|
||||
copySize(m);
|
||||
data = m.data;
|
||||
datastart = m.datastart;
|
||||
dataend = m.dataend;
|
||||
datalimit = m.datalimit;
|
||||
allocator = m.allocator;
|
||||
u = m.u;
|
||||
}
|
||||
return *this;
|
||||
}
|
||||
|
||||
inline
|
||||
Mat Mat::row(int y) const
|
||||
{
|
||||
|
@ -839,67 +662,6 @@ Mat Mat::colRange(const Range& r) const
|
|||
return Mat(*this, Range::all(), r);
|
||||
}
|
||||
|
||||
inline
|
||||
Mat Mat::clone() const
|
||||
{
|
||||
Mat m;
|
||||
copyTo(m);
|
||||
return m;
|
||||
}
|
||||
|
||||
inline
|
||||
void Mat::assignTo( Mat& m, int _type ) const
|
||||
{
|
||||
if( _type < 0 )
|
||||
m = *this;
|
||||
else
|
||||
convertTo(m, _type);
|
||||
}
|
||||
|
||||
inline
|
||||
void Mat::create(int _rows, int _cols, int _type)
|
||||
{
|
||||
_type &= TYPE_MASK;
|
||||
if( dims <= 2 && rows == _rows && cols == _cols && type() == _type && data )
|
||||
return;
|
||||
int sz[] = {_rows, _cols};
|
||||
create(2, sz, _type);
|
||||
}
|
||||
|
||||
inline
|
||||
void Mat::create(Size _sz, int _type)
|
||||
{
|
||||
create(_sz.height, _sz.width, _type);
|
||||
}
|
||||
|
||||
inline
|
||||
void Mat::addref()
|
||||
{
|
||||
if( u )
|
||||
CV_XADD(&u->refcount, 1);
|
||||
}
|
||||
|
||||
inline
|
||||
void Mat::release()
|
||||
{
|
||||
if( u && CV_XADD(&u->refcount, -1) == 1 )
|
||||
deallocate();
|
||||
u = NULL;
|
||||
datastart = dataend = datalimit = data = 0;
|
||||
for(int i = 0; i < dims; i++)
|
||||
size.p[i] = 0;
|
||||
#ifdef _DEBUG
|
||||
flags = MAGIC_VAL;
|
||||
dims = rows = cols = 0;
|
||||
if(step.p != step.buf)
|
||||
{
|
||||
fastFree(step.p);
|
||||
step.p = step.buf;
|
||||
size.p = &rows;
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
inline
|
||||
Mat Mat::operator()( Range _rowRange, Range _colRange ) const
|
||||
{
|
||||
|
@ -968,40 +730,6 @@ int Mat::channels() const
|
|||
return CV_MAT_CN(flags);
|
||||
}
|
||||
|
||||
inline
|
||||
size_t Mat::step1(int i) const
|
||||
{
|
||||
return step.p[i] / elemSize1();
|
||||
}
|
||||
|
||||
inline
|
||||
bool Mat::empty() const
|
||||
{
|
||||
return data == 0 || total() == 0 || dims == 0;
|
||||
}
|
||||
|
||||
inline
|
||||
size_t Mat::total() const
|
||||
{
|
||||
if( dims <= 2 )
|
||||
return (size_t)rows * cols;
|
||||
size_t p = 1;
|
||||
for( int i = 0; i < dims; i++ )
|
||||
p *= size[i];
|
||||
return p;
|
||||
}
|
||||
|
||||
inline
|
||||
size_t Mat::total(int startDim, int endDim) const
|
||||
{
|
||||
CV_Assert( 0 <= startDim && startDim <= endDim);
|
||||
size_t p = 1;
|
||||
int endDim_ = endDim <= dims ? endDim : dims;
|
||||
for( int i = startDim; i < endDim_; i++ )
|
||||
p *= size[i];
|
||||
return p;
|
||||
}
|
||||
|
||||
inline
|
||||
uchar* Mat::ptr(int y)
|
||||
{
|
||||
|
@ -1289,6 +1017,8 @@ const _Tp& Mat::at(const Vec<int, n>& idx) const
|
|||
template<typename _Tp> inline
|
||||
MatConstIterator_<_Tp> Mat::begin() const
|
||||
{
|
||||
if (empty())
|
||||
return MatConstIterator_<_Tp>();
|
||||
CV_DbgAssert( elemSize() == sizeof(_Tp) );
|
||||
return MatConstIterator_<_Tp>((const Mat_<_Tp>*)this);
|
||||
}
|
||||
|
@ -1296,6 +1026,8 @@ MatConstIterator_<_Tp> Mat::begin() const
|
|||
template<typename _Tp> inline
|
||||
MatConstIterator_<_Tp> Mat::end() const
|
||||
{
|
||||
if (empty())
|
||||
return MatConstIterator_<_Tp>();
|
||||
CV_DbgAssert( elemSize() == sizeof(_Tp) );
|
||||
MatConstIterator_<_Tp> it((const Mat_<_Tp>*)this);
|
||||
it += total();
|
||||
|
@ -1305,6 +1037,8 @@ MatConstIterator_<_Tp> Mat::end() const
|
|||
template<typename _Tp> inline
|
||||
MatIterator_<_Tp> Mat::begin()
|
||||
{
|
||||
if (empty())
|
||||
return MatIterator_<_Tp>();
|
||||
CV_DbgAssert( elemSize() == sizeof(_Tp) );
|
||||
return MatIterator_<_Tp>((Mat_<_Tp>*)this);
|
||||
}
|
||||
|
@ -1312,6 +1046,8 @@ MatIterator_<_Tp> Mat::begin()
|
|||
template<typename _Tp> inline
|
||||
MatIterator_<_Tp> Mat::end()
|
||||
{
|
||||
if (empty())
|
||||
return MatIterator_<_Tp>();
|
||||
CV_DbgAssert( elemSize() == sizeof(_Tp) );
|
||||
MatIterator_<_Tp> it((Mat_<_Tp>*)this);
|
||||
it += total();
|
||||
|
@ -1482,11 +1218,11 @@ Mat& Mat::operator = (Mat&& m)
|
|||
///////////////////////////// MatSize ////////////////////////////
|
||||
|
||||
inline
|
||||
MatSize::MatSize(int* _p)
|
||||
MatSize::MatSize(int* _p) CV_NOEXCEPT
|
||||
: p(_p) {}
|
||||
|
||||
inline
|
||||
int MatSize::dims() const
|
||||
int MatSize::dims() const CV_NOEXCEPT
|
||||
{
|
||||
return (p - 1)[0];
|
||||
}
|
||||
|
@ -1519,29 +1255,13 @@ int& MatSize::operator[](int i)
|
|||
}
|
||||
|
||||
inline
|
||||
MatSize::operator const int*() const
|
||||
MatSize::operator const int*() const CV_NOEXCEPT
|
||||
{
|
||||
return p;
|
||||
}
|
||||
|
||||
inline
|
||||
bool MatSize::operator == (const MatSize& sz) const
|
||||
{
|
||||
int d = dims();
|
||||
int dsz = sz.dims();
|
||||
if( d != dsz )
|
||||
return false;
|
||||
if( d == 2 )
|
||||
return p[0] == sz.p[0] && p[1] == sz.p[1];
|
||||
|
||||
for( int i = 0; i < d; i++ )
|
||||
if( p[i] != sz.p[i] )
|
||||
return false;
|
||||
return true;
|
||||
}
|
||||
|
||||
inline
|
||||
bool MatSize::operator != (const MatSize& sz) const
|
||||
bool MatSize::operator != (const MatSize& sz) const CV_NOEXCEPT
|
||||
{
|
||||
return !(*this == sz);
|
||||
}
|
||||
|
@ -1551,25 +1271,25 @@ bool MatSize::operator != (const MatSize& sz) const
|
|||
///////////////////////////// MatStep ////////////////////////////
|
||||
|
||||
inline
|
||||
MatStep::MatStep()
|
||||
MatStep::MatStep() CV_NOEXCEPT
|
||||
{
|
||||
p = buf; p[0] = p[1] = 0;
|
||||
}
|
||||
|
||||
inline
|
||||
MatStep::MatStep(size_t s)
|
||||
MatStep::MatStep(size_t s) CV_NOEXCEPT
|
||||
{
|
||||
p = buf; p[0] = s; p[1] = 0;
|
||||
}
|
||||
|
||||
inline
|
||||
const size_t& MatStep::operator[](int i) const
|
||||
const size_t& MatStep::operator[](int i) const CV_NOEXCEPT
|
||||
{
|
||||
return p[i];
|
||||
}
|
||||
|
||||
inline
|
||||
size_t& MatStep::operator[](int i)
|
||||
size_t& MatStep::operator[](int i) CV_NOEXCEPT
|
||||
{
|
||||
return p[i];
|
||||
}
|
||||
|
@ -1592,7 +1312,7 @@ inline MatStep& MatStep::operator = (size_t s)
|
|||
////////////////////////////// Mat_<_Tp> ////////////////////////////
|
||||
|
||||
template<typename _Tp> inline
|
||||
Mat_<_Tp>::Mat_()
|
||||
Mat_<_Tp>::Mat_() CV_NOEXCEPT
|
||||
: Mat()
|
||||
{
|
||||
flags = (flags & ~CV_MAT_TYPE_MASK) | traits::Type<_Tp>::value;
|
||||
|
@ -1744,6 +1464,11 @@ Mat_<_Tp>::Mat_(const std::array<_Tp, _Nm>& arr, bool copyData)
|
|||
template<typename _Tp> inline
|
||||
Mat_<_Tp>& Mat_<_Tp>::operator = (const Mat& m)
|
||||
{
|
||||
if (m.empty())
|
||||
{
|
||||
release();
|
||||
return *this;
|
||||
}
|
||||
if( traits::Type<_Tp>::value == m.type() )
|
||||
{
|
||||
Mat::operator = (m);
|
||||
|
@ -1795,9 +1520,7 @@ template<typename _Tp> inline
|
|||
void Mat_<_Tp>::release()
|
||||
{
|
||||
Mat::release();
|
||||
#ifdef _DEBUG
|
||||
flags = (flags & ~CV_MAT_TYPE_MASK) | traits::Type<_Tp>::value;
|
||||
#endif
|
||||
}
|
||||
|
||||
template<typename _Tp> inline
|
||||
|
@ -1809,7 +1532,7 @@ Mat_<_Tp> Mat_<_Tp>::cross(const Mat_& m) const
|
|||
template<typename _Tp> template<typename T2> inline
|
||||
Mat_<_Tp>::operator Mat_<T2>() const
|
||||
{
|
||||
return Mat_<T2>(*this);
|
||||
return Mat_<T2>(static_cast<const Mat&>(*this));
|
||||
}
|
||||
|
||||
template<typename _Tp> inline
|
||||
|
@ -2103,7 +1826,7 @@ void Mat_<_Tp>::forEach(const Functor& operation) const {
|
|||
|
||||
template<typename _Tp> inline
|
||||
Mat_<_Tp>::Mat_(Mat_&& m)
|
||||
: Mat(m)
|
||||
: Mat(std::move(m))
|
||||
{
|
||||
}
|
||||
|
||||
|
@ -2119,12 +1842,17 @@ Mat_<_Tp>::Mat_(Mat&& m)
|
|||
: Mat()
|
||||
{
|
||||
flags = (flags & ~CV_MAT_TYPE_MASK) | traits::Type<_Tp>::value;
|
||||
*this = m;
|
||||
*this = std::move(m);
|
||||
}
|
||||
|
||||
template<typename _Tp> inline
|
||||
Mat_<_Tp>& Mat_<_Tp>::operator = (Mat&& m)
|
||||
{
|
||||
if (m.empty())
|
||||
{
|
||||
release();
|
||||
return *this;
|
||||
}
|
||||
if( traits::Type<_Tp>::value == m.type() )
|
||||
{
|
||||
Mat::operator = ((Mat&&)m);
|
||||
|
@ -2152,51 +1880,6 @@ Mat_<_Tp>::Mat_(MatExpr&& e)
|
|||
|
||||
///////////////////////////// SparseMat /////////////////////////////
|
||||
|
||||
inline
|
||||
SparseMat::SparseMat()
|
||||
: flags(MAGIC_VAL), hdr(0)
|
||||
{}
|
||||
|
||||
inline
|
||||
SparseMat::SparseMat(int _dims, const int* _sizes, int _type)
|
||||
: flags(MAGIC_VAL), hdr(0)
|
||||
{
|
||||
create(_dims, _sizes, _type);
|
||||
}
|
||||
|
||||
inline
|
||||
SparseMat::SparseMat(const SparseMat& m)
|
||||
: flags(m.flags), hdr(m.hdr)
|
||||
{
|
||||
addref();
|
||||
}
|
||||
|
||||
inline
|
||||
SparseMat::~SparseMat()
|
||||
{
|
||||
release();
|
||||
}
|
||||
|
||||
inline
|
||||
SparseMat& SparseMat::operator = (const SparseMat& m)
|
||||
{
|
||||
if( this != &m )
|
||||
{
|
||||
if( m.hdr )
|
||||
CV_XADD(&m.hdr->refcount, 1);
|
||||
release();
|
||||
flags = m.flags;
|
||||
hdr = m.hdr;
|
||||
}
|
||||
return *this;
|
||||
}
|
||||
|
||||
inline
|
||||
SparseMat& SparseMat::operator = (const Mat& m)
|
||||
{
|
||||
return (*this = SparseMat(m));
|
||||
}
|
||||
|
||||
inline
|
||||
SparseMat SparseMat::clone() const
|
||||
{
|
||||
|
@ -2205,30 +1888,6 @@ SparseMat SparseMat::clone() const
|
|||
return temp;
|
||||
}
|
||||
|
||||
inline
|
||||
void SparseMat::assignTo( SparseMat& m, int _type ) const
|
||||
{
|
||||
if( _type < 0 )
|
||||
m = *this;
|
||||
else
|
||||
convertTo(m, _type);
|
||||
}
|
||||
|
||||
inline
|
||||
void SparseMat::addref()
|
||||
{
|
||||
if( hdr )
|
||||
CV_XADD(&hdr->refcount, 1);
|
||||
}
|
||||
|
||||
inline
|
||||
void SparseMat::release()
|
||||
{
|
||||
if( hdr && CV_XADD(&hdr->refcount, -1) == 1 )
|
||||
delete hdr;
|
||||
hdr = 0;
|
||||
}
|
||||
|
||||
inline
|
||||
size_t SparseMat::elemSize() const
|
||||
{
|
||||
|
@ -2288,36 +1947,6 @@ size_t SparseMat::nzcount() const
|
|||
return hdr ? hdr->nodeCount : 0;
|
||||
}
|
||||
|
||||
inline
|
||||
size_t SparseMat::hash(int i0) const
|
||||
{
|
||||
return (size_t)i0;
|
||||
}
|
||||
|
||||
inline
|
||||
size_t SparseMat::hash(int i0, int i1) const
|
||||
{
|
||||
return (size_t)(unsigned)i0 * HASH_SCALE + (unsigned)i1;
|
||||
}
|
||||
|
||||
inline
|
||||
size_t SparseMat::hash(int i0, int i1, int i2) const
|
||||
{
|
||||
return ((size_t)(unsigned)i0 * HASH_SCALE + (unsigned)i1) * HASH_SCALE + (unsigned)i2;
|
||||
}
|
||||
|
||||
inline
|
||||
size_t SparseMat::hash(const int* idx) const
|
||||
{
|
||||
size_t h = (unsigned)idx[0];
|
||||
if( !hdr )
|
||||
return 0;
|
||||
int d = hdr->dims;
|
||||
for(int i = 1; i < d; i++ )
|
||||
h = h * HASH_SCALE + (unsigned)idx[i];
|
||||
return h;
|
||||
}
|
||||
|
||||
template<typename _Tp> inline
|
||||
_Tp& SparseMat::ref(int i0, size_t* hashval)
|
||||
{
|
||||
|
@ -2665,6 +2294,7 @@ MatConstIterator::MatConstIterator(const Mat* _m)
|
|||
{
|
||||
if( m && m->isContinuous() )
|
||||
{
|
||||
CV_Assert(!m->empty());
|
||||
sliceStart = m->ptr();
|
||||
sliceEnd = sliceStart + m->total()*elemSize;
|
||||
}
|
||||
|
@ -2678,6 +2308,7 @@ MatConstIterator::MatConstIterator(const Mat* _m, int _row, int _col)
|
|||
CV_Assert(m && m->dims <= 2);
|
||||
if( m->isContinuous() )
|
||||
{
|
||||
CV_Assert(!m->empty());
|
||||
sliceStart = m->ptr();
|
||||
sliceEnd = sliceStart + m->total()*elemSize;
|
||||
}
|
||||
|
@ -2692,6 +2323,7 @@ MatConstIterator::MatConstIterator(const Mat* _m, Point _pt)
|
|||
CV_Assert(m && m->dims <= 2);
|
||||
if( m->isContinuous() )
|
||||
{
|
||||
CV_Assert(!m->empty());
|
||||
sliceStart = m->ptr();
|
||||
sliceEnd = sliceStart + m->total()*elemSize;
|
||||
}
|
||||
|
@ -3634,74 +3266,6 @@ const Mat_<_Tp>& operator /= (const Mat_<_Tp>& a, const MatExpr& b)
|
|||
|
||||
//////////////////////////////// UMat ////////////////////////////////
|
||||
|
||||
inline
|
||||
UMat::UMat(UMatUsageFlags _usageFlags)
|
||||
: flags(MAGIC_VAL), dims(0), rows(0), cols(0), allocator(0), usageFlags(_usageFlags), u(0), offset(0), size(&rows)
|
||||
{}
|
||||
|
||||
inline
|
||||
UMat::UMat(int _rows, int _cols, int _type, UMatUsageFlags _usageFlags)
|
||||
: flags(MAGIC_VAL), dims(0), rows(0), cols(0), allocator(0), usageFlags(_usageFlags), u(0), offset(0), size(&rows)
|
||||
{
|
||||
create(_rows, _cols, _type);
|
||||
}
|
||||
|
||||
inline
|
||||
UMat::UMat(int _rows, int _cols, int _type, const Scalar& _s, UMatUsageFlags _usageFlags)
|
||||
: flags(MAGIC_VAL), dims(0), rows(0), cols(0), allocator(0), usageFlags(_usageFlags), u(0), offset(0), size(&rows)
|
||||
{
|
||||
create(_rows, _cols, _type);
|
||||
*this = _s;
|
||||
}
|
||||
|
||||
inline
|
||||
UMat::UMat(Size _sz, int _type, UMatUsageFlags _usageFlags)
|
||||
: flags(MAGIC_VAL), dims(0), rows(0), cols(0), allocator(0), usageFlags(_usageFlags), u(0), offset(0), size(&rows)
|
||||
{
|
||||
create( _sz.height, _sz.width, _type );
|
||||
}
|
||||
|
||||
inline
|
||||
UMat::UMat(Size _sz, int _type, const Scalar& _s, UMatUsageFlags _usageFlags)
|
||||
: flags(MAGIC_VAL), dims(0), rows(0), cols(0), allocator(0), usageFlags(_usageFlags), u(0), offset(0), size(&rows)
|
||||
{
|
||||
create(_sz.height, _sz.width, _type);
|
||||
*this = _s;
|
||||
}
|
||||
|
||||
inline
|
||||
UMat::UMat(int _dims, const int* _sz, int _type, UMatUsageFlags _usageFlags)
|
||||
: flags(MAGIC_VAL), dims(0), rows(0), cols(0), allocator(0), usageFlags(_usageFlags), u(0), offset(0), size(&rows)
|
||||
{
|
||||
create(_dims, _sz, _type);
|
||||
}
|
||||
|
||||
inline
|
||||
UMat::UMat(int _dims, const int* _sz, int _type, const Scalar& _s, UMatUsageFlags _usageFlags)
|
||||
: flags(MAGIC_VAL), dims(0), rows(0), cols(0), allocator(0), usageFlags(_usageFlags), u(0), offset(0), size(&rows)
|
||||
{
|
||||
create(_dims, _sz, _type);
|
||||
*this = _s;
|
||||
}
|
||||
|
||||
inline
|
||||
UMat::UMat(const UMat& m)
|
||||
: flags(m.flags), dims(m.dims), rows(m.rows), cols(m.cols), allocator(m.allocator),
|
||||
usageFlags(m.usageFlags), u(m.u), offset(m.offset), size(&rows)
|
||||
{
|
||||
addref();
|
||||
if( m.dims <= 2 )
|
||||
{
|
||||
step[0] = m.step[0]; step[1] = m.step[1];
|
||||
}
|
||||
else
|
||||
{
|
||||
dims = 0;
|
||||
copySize(m);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
template<typename _Tp> inline
|
||||
UMat::UMat(const std::vector<_Tp>& vec, bool copyData)
|
||||
: flags(MAGIC_VAL | traits::Type<_Tp>::value | CV_MAT_CONT_FLAG), dims(2), rows((int)vec.size()),
|
||||
|
@ -3718,33 +3282,6 @@ cols(1), allocator(0), usageFlags(USAGE_DEFAULT), u(0), offset(0), size(&rows)
|
|||
Mat((int)vec.size(), 1, traits::Type<_Tp>::value, (uchar*)&vec[0]).copyTo(*this);
|
||||
}
|
||||
|
||||
inline
|
||||
UMat& UMat::operator = (const UMat& m)
|
||||
{
|
||||
if( this != &m )
|
||||
{
|
||||
const_cast<UMat&>(m).addref();
|
||||
release();
|
||||
flags = m.flags;
|
||||
if( dims <= 2 && m.dims <= 2 )
|
||||
{
|
||||
dims = m.dims;
|
||||
rows = m.rows;
|
||||
cols = m.cols;
|
||||
step[0] = m.step[0];
|
||||
step[1] = m.step[1];
|
||||
}
|
||||
else
|
||||
copySize(m);
|
||||
allocator = m.allocator;
|
||||
if (usageFlags == USAGE_DEFAULT)
|
||||
usageFlags = m.usageFlags;
|
||||
u = m.u;
|
||||
offset = m.offset;
|
||||
}
|
||||
return *this;
|
||||
}
|
||||
|
||||
inline
|
||||
UMat UMat::row(int y) const
|
||||
{
|
||||
|
@ -3781,55 +3318,6 @@ UMat UMat::colRange(const Range& r) const
|
|||
return UMat(*this, Range::all(), r);
|
||||
}
|
||||
|
||||
inline
|
||||
UMat UMat::clone() const
|
||||
{
|
||||
UMat m;
|
||||
copyTo(m);
|
||||
return m;
|
||||
}
|
||||
|
||||
inline
|
||||
void UMat::assignTo( UMat& m, int _type ) const
|
||||
{
|
||||
if( _type < 0 )
|
||||
m = *this;
|
||||
else
|
||||
convertTo(m, _type);
|
||||
}
|
||||
|
||||
inline
|
||||
void UMat::create(int _rows, int _cols, int _type, UMatUsageFlags _usageFlags)
|
||||
{
|
||||
_type &= TYPE_MASK;
|
||||
if( dims <= 2 && rows == _rows && cols == _cols && type() == _type && u )
|
||||
return;
|
||||
int sz[] = {_rows, _cols};
|
||||
create(2, sz, _type, _usageFlags);
|
||||
}
|
||||
|
||||
inline
|
||||
void UMat::create(Size _sz, int _type, UMatUsageFlags _usageFlags)
|
||||
{
|
||||
create(_sz.height, _sz.width, _type, _usageFlags);
|
||||
}
|
||||
|
||||
inline
|
||||
void UMat::addref()
|
||||
{
|
||||
if( u )
|
||||
CV_XADD(&(u->urefcount), 1);
|
||||
}
|
||||
|
||||
inline void UMat::release()
|
||||
{
|
||||
if( u && CV_XADD(&(u->urefcount), -1) == 1 )
|
||||
deallocate();
|
||||
for(int i = 0; i < dims; i++)
|
||||
size.p[i] = 0;
|
||||
u = 0;
|
||||
}
|
||||
|
||||
inline
|
||||
UMat UMat::operator()( Range _rowRange, Range _colRange ) const
|
||||
{
|
||||
|
@ -3904,23 +3392,6 @@ size_t UMat::step1(int i) const
|
|||
return step.p[i] / elemSize1();
|
||||
}
|
||||
|
||||
inline
|
||||
bool UMat::empty() const
|
||||
{
|
||||
return u == 0 || total() == 0 || dims == 0;
|
||||
}
|
||||
|
||||
inline
|
||||
size_t UMat::total() const
|
||||
{
|
||||
if( dims <= 2 )
|
||||
return (size_t)rows * cols;
|
||||
size_t p = 1;
|
||||
for( int i = 0; i < dims; i++ )
|
||||
p *= size[i];
|
||||
return p;
|
||||
}
|
||||
|
||||
#ifdef CV_CXX_MOVE_SEMANTICS
|
||||
|
||||
inline
|
||||
|
@ -4018,10 +3489,18 @@ inline void UMatData::markDeviceCopyObsolete(bool flag)
|
|||
|
||||
//! @endcond
|
||||
|
||||
static inline
|
||||
void swap(MatExpr& a, MatExpr& b) { a.swap(b); }
|
||||
|
||||
} //cv
|
||||
|
||||
#ifdef _MSC_VER
|
||||
#pragma warning( pop )
|
||||
#endif
|
||||
|
||||
#ifdef CV_DISABLE_CLANG_ENUM_WARNINGS
|
||||
#undef CV_DISABLE_CLANG_ENUM_WARNINGS
|
||||
#pragma clang diagnostic pop
|
||||
#endif
|
||||
|
||||
#endif
|
|
@ -151,7 +151,16 @@ public:
|
|||
static Matx ones();
|
||||
static Matx eye();
|
||||
static Matx diag(const diag_type& d);
|
||||
/** @brief Generates uniformly distributed random numbers
|
||||
@param a Range boundary.
|
||||
@param b The other range boundary (boundaries don't have to be ordered, the lower boundary is inclusive,
|
||||
the upper one is exclusive).
|
||||
*/
|
||||
static Matx randu(_Tp a, _Tp b);
|
||||
/** @brief Generates normally distributed random numbers
|
||||
@param a Mean value.
|
||||
@param b Standard deviation.
|
||||
*/
|
||||
static Matx randn(_Tp a, _Tp b);
|
||||
|
||||
//! dot product computed with the default precision
|
||||
|
@ -391,6 +400,10 @@ public:
|
|||
const _Tp& operator ()(int i) const;
|
||||
_Tp& operator ()(int i);
|
||||
|
||||
#ifdef CV_CXX11
|
||||
Vec<_Tp, cn>& operator=(const Vec<_Tp, cn>& rhs) = default;
|
||||
#endif
|
||||
|
||||
Vec(const Matx<_Tp, cn, 1>& a, const Matx<_Tp, cn, 1>& b, Matx_AddOp);
|
||||
Vec(const Matx<_Tp, cn, 1>& a, const Matx<_Tp, cn, 1>& b, Matx_SubOp);
|
||||
template<typename _T2> Vec(const Matx<_Tp, cn, 1>& a, _T2 alpha, Matx_ScaleOp);
|
||||
|
@ -1275,6 +1288,34 @@ Matx<_Tp, m, n> operator * (double alpha, const Matx<_Tp, m, n>& a)
|
|||
return Matx<_Tp, m, n>(a, alpha, Matx_ScaleOp());
|
||||
}
|
||||
|
||||
template<typename _Tp, int m, int n> static inline
|
||||
Matx<_Tp, m, n>& operator /= (Matx<_Tp, m, n>& a, float alpha)
|
||||
{
|
||||
for( int i = 0; i < m*n; i++ )
|
||||
a.val[i] = a.val[i] / alpha;
|
||||
return a;
|
||||
}
|
||||
|
||||
template<typename _Tp, int m, int n> static inline
|
||||
Matx<_Tp, m, n>& operator /= (Matx<_Tp, m, n>& a, double alpha)
|
||||
{
|
||||
for( int i = 0; i < m*n; i++ )
|
||||
a.val[i] = a.val[i] / alpha;
|
||||
return a;
|
||||
}
|
||||
|
||||
template<typename _Tp, int m, int n> static inline
|
||||
Matx<_Tp, m, n> operator / (const Matx<_Tp, m, n>& a, float alpha)
|
||||
{
|
||||
return Matx<_Tp, m, n>(a, 1.f/alpha, Matx_ScaleOp());
|
||||
}
|
||||
|
||||
template<typename _Tp, int m, int n> static inline
|
||||
Matx<_Tp, m, n> operator / (const Matx<_Tp, m, n>& a, double alpha)
|
||||
{
|
||||
return Matx<_Tp, m, n>(a, 1./alpha, Matx_ScaleOp());
|
||||
}
|
||||
|
||||
template<typename _Tp, int m, int n> static inline
|
||||
Matx<_Tp, m, n> operator - (const Matx<_Tp, m, n>& a)
|
||||
{
|
|
@ -70,7 +70,7 @@ class CV_EXPORTS Image2D;
|
|||
class CV_EXPORTS_W_SIMPLE Device
|
||||
{
|
||||
public:
|
||||
CV_WRAP Device();
|
||||
CV_WRAP Device() CV_NOEXCEPT;
|
||||
explicit Device(void* d);
|
||||
Device(const Device& d);
|
||||
Device& operator = (const Device& d);
|
||||
|
@ -238,7 +238,7 @@ protected:
|
|||
class CV_EXPORTS Context
|
||||
{
|
||||
public:
|
||||
Context();
|
||||
Context() CV_NOEXCEPT;
|
||||
explicit Context(int dtype);
|
||||
~Context();
|
||||
Context(const Context& c);
|
||||
|
@ -269,7 +269,7 @@ public:
|
|||
class CV_EXPORTS Platform
|
||||
{
|
||||
public:
|
||||
Platform();
|
||||
Platform() CV_NOEXCEPT;
|
||||
~Platform();
|
||||
Platform(const Platform& p);
|
||||
Platform& operator = (const Platform& p);
|
||||
|
@ -324,7 +324,7 @@ void initializeContextFromHandle(Context& ctx, void* platform, void* context, vo
|
|||
class CV_EXPORTS Queue
|
||||
{
|
||||
public:
|
||||
Queue();
|
||||
Queue() CV_NOEXCEPT;
|
||||
explicit Queue(const Context& c, const Device& d=Device());
|
||||
~Queue();
|
||||
Queue(const Queue& q);
|
||||
|
@ -350,7 +350,7 @@ class CV_EXPORTS KernelArg
|
|||
public:
|
||||
enum { LOCAL=1, READ_ONLY=2, WRITE_ONLY=4, READ_WRITE=6, CONSTANT=8, PTR_ONLY = 16, NO_SIZE=256 };
|
||||
KernelArg(int _flags, UMat* _m, int wscale=1, int iwscale=1, const void* _obj=0, size_t _sz=0);
|
||||
KernelArg();
|
||||
KernelArg() CV_NOEXCEPT;
|
||||
|
||||
static KernelArg Local(size_t localMemSize)
|
||||
{ return KernelArg(LOCAL, 0, 1, 1, 0, localMemSize); }
|
||||
|
@ -387,7 +387,7 @@ public:
|
|||
class CV_EXPORTS Kernel
|
||||
{
|
||||
public:
|
||||
Kernel();
|
||||
Kernel() CV_NOEXCEPT;
|
||||
Kernel(const char* kname, const Program& prog);
|
||||
Kernel(const char* kname, const ProgramSource& prog,
|
||||
const String& buildopts = String(), String* errmsg=0);
|
||||
|
@ -597,7 +597,7 @@ protected:
|
|||
class CV_EXPORTS Program
|
||||
{
|
||||
public:
|
||||
Program();
|
||||
Program() CV_NOEXCEPT;
|
||||
Program(const ProgramSource& src,
|
||||
const String& buildflags, String& errmsg);
|
||||
Program(const Program& prog);
|
||||
|
@ -642,7 +642,7 @@ class CV_EXPORTS ProgramSource
|
|||
public:
|
||||
typedef uint64 hash_t; // deprecated
|
||||
|
||||
ProgramSource();
|
||||
ProgramSource() CV_NOEXCEPT;
|
||||
explicit ProgramSource(const String& module, const String& name, const String& codeStr, const String& codeHash);
|
||||
explicit ProgramSource(const String& prog); // deprecated
|
||||
explicit ProgramSource(const char* prog); // deprecated
|
||||
|
@ -711,7 +711,7 @@ protected:
|
|||
class CV_EXPORTS PlatformInfo
|
||||
{
|
||||
public:
|
||||
PlatformInfo();
|
||||
PlatformInfo() CV_NOEXCEPT;
|
||||
explicit PlatformInfo(void* id);
|
||||
~PlatformInfo();
|
||||
|
||||
|
@ -720,7 +720,12 @@ public:
|
|||
|
||||
String name() const;
|
||||
String vendor() const;
|
||||
|
||||
/// See CL_PLATFORM_VERSION
|
||||
String version() const;
|
||||
int versionMajor() const;
|
||||
int versionMinor() const;
|
||||
|
||||
int deviceNumber() const;
|
||||
void getDevice(Device& device, int d) const;
|
||||
|
||||
|
@ -771,7 +776,7 @@ CV_EXPORTS void buildOptionsAddMatrixDescription(String& buildOptions, const Str
|
|||
class CV_EXPORTS Image2D
|
||||
{
|
||||
public:
|
||||
Image2D();
|
||||
Image2D() CV_NOEXCEPT;
|
||||
|
||||
/**
|
||||
@param src UMat object from which to get image properties and data
|
|
@ -47,6 +47,23 @@ static std::string bytesToStringRepr(size_t value)
|
|||
s = s.substr(0, s.size() - 1);
|
||||
return s;
|
||||
}
|
||||
|
||||
static String getDeviceTypeString(const cv::ocl::Device& device)
|
||||
{
|
||||
if (device.type() == cv::ocl::Device::TYPE_CPU) {
|
||||
return "CPU";
|
||||
}
|
||||
|
||||
if (device.type() == cv::ocl::Device::TYPE_GPU) {
|
||||
if (device.hostUnifiedMemory()) {
|
||||
return "iGPU";
|
||||
} else {
|
||||
return "dGPU";
|
||||
}
|
||||
}
|
||||
|
||||
return "unknown";
|
||||
}
|
||||
} // namespace
|
||||
|
||||
static void dumpOpenCLInformation()
|
||||
|
@ -64,46 +81,36 @@ static void dumpOpenCLInformation()
|
|||
|
||||
std::vector<PlatformInfo> platforms;
|
||||
cv::ocl::getPlatfomsInfo(platforms);
|
||||
if (platforms.size() > 0)
|
||||
{
|
||||
DUMP_MESSAGE_STDOUT("OpenCL Platforms: ");
|
||||
for (size_t i = 0; i < platforms.size(); i++)
|
||||
{
|
||||
const PlatformInfo* platform = &platforms[i];
|
||||
DUMP_MESSAGE_STDOUT(" " << platform->name().c_str());
|
||||
Device current_device;
|
||||
for (int j = 0; j < platform->deviceNumber(); j++)
|
||||
{
|
||||
platform->getDevice(current_device, j);
|
||||
const char* deviceTypeStr = current_device.type() == Device::TYPE_CPU
|
||||
? ("CPU") : (current_device.type() == Device::TYPE_GPU ? current_device.hostUnifiedMemory() ? "iGPU" : "dGPU" : "unknown");
|
||||
DUMP_MESSAGE_STDOUT( " " << deviceTypeStr << ": " << current_device.name().c_str() << " (" << current_device.version().c_str() << ")");
|
||||
DUMP_CONFIG_PROPERTY( cv::format("cv_ocl_platform_%d_device_%d", (int)i, (int)j ),
|
||||
cv::format("(Platform=%s)(Type=%s)(Name=%s)(Version=%s)",
|
||||
platform->name().c_str(), deviceTypeStr, current_device.name().c_str(), current_device.version().c_str()) );
|
||||
}
|
||||
}
|
||||
}
|
||||
else
|
||||
if (platforms.empty())
|
||||
{
|
||||
DUMP_MESSAGE_STDOUT("OpenCL is not available");
|
||||
DUMP_CONFIG_PROPERTY("cv_ocl", "not available");
|
||||
return;
|
||||
}
|
||||
|
||||
DUMP_MESSAGE_STDOUT("OpenCL Platforms: ");
|
||||
for (size_t i = 0; i < platforms.size(); i++)
|
||||
{
|
||||
const PlatformInfo* platform = &platforms[i];
|
||||
DUMP_MESSAGE_STDOUT(" " << platform->name());
|
||||
Device current_device;
|
||||
for (int j = 0; j < platform->deviceNumber(); j++)
|
||||
{
|
||||
platform->getDevice(current_device, j);
|
||||
String deviceTypeStr = getDeviceTypeString(current_device);
|
||||
DUMP_MESSAGE_STDOUT( " " << deviceTypeStr << ": " << current_device.name() << " (" << current_device.version() << ")");
|
||||
DUMP_CONFIG_PROPERTY( cv::format("cv_ocl_platform_%d_device_%d", (int)i, j ),
|
||||
cv::format("(Platform=%s)(Type=%s)(Name=%s)(Version=%s)",
|
||||
platform->name().c_str(), deviceTypeStr.c_str(), current_device.name().c_str(), current_device.version().c_str()) );
|
||||
}
|
||||
}
|
||||
const Device& device = Device::getDefault();
|
||||
if (!device.available())
|
||||
CV_Error(Error::OpenCLInitError, "OpenCL device is not available");
|
||||
|
||||
DUMP_MESSAGE_STDOUT("Current OpenCL device: ");
|
||||
|
||||
#if 0
|
||||
DUMP_MESSAGE_STDOUT(" Platform = " << device.getPlatform().name());
|
||||
DUMP_CONFIG_PROPERTY("cv_ocl_current_platformName", device.getPlatform().name());
|
||||
#endif
|
||||
|
||||
const char* deviceTypeStr = device.type() == Device::TYPE_CPU
|
||||
? ("CPU") : (device.type() == Device::TYPE_GPU ? device.hostUnifiedMemory() ? "iGPU" : "dGPU" : "unknown");
|
||||
String deviceTypeStr = getDeviceTypeString(device);
|
||||
DUMP_MESSAGE_STDOUT(" Type = " << deviceTypeStr);
|
||||
DUMP_CONFIG_PROPERTY("cv_ocl_current_deviceType", deviceTypeStr);
|
||||
|
||||
|
@ -156,7 +163,7 @@ static void dumpOpenCLInformation()
|
|||
}
|
||||
pos = pos2 + 1;
|
||||
}
|
||||
DUMP_CONFIG_PROPERTY("cv_ocl_current_extensions", extensionsStr.c_str());
|
||||
DUMP_CONFIG_PROPERTY("cv_ocl_current_extensions", extensionsStr);
|
||||
|
||||
const char* haveAmdBlasStr = haveAmdBlas() ? "Yes" : "No";
|
||||
DUMP_MESSAGE_STDOUT(" Has AMD Blas = " << haveAmdBlasStr);
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue