Hog源码注释(hog.cpp的注解)
生活随笔
收集整理的這篇文章主要介紹了
Hog源码注释(hog.cpp的注解)
小編覺(jué)得挺不錯(cuò)的,現(xiàn)在分享給大家,幫大家做個(gè)參考.
【原文:http://blog.csdn.net/pp5576155/article/details/7029699】
首先關(guān)于HOG算法:
#include "_cvaux.h"/*****************************************************************************************
struct CV_EXPORTS HOGDescriptor
{
public:
enum { L2Hys=0 };
HOGDescriptor() : winSize(64,128), blockSize(16,16), blockStride(8,8),
cellSize(8,8), nbins(9), derivAperture(1), winSigma(-1),
histogramNormType(L2Hys), L2HysThreshold(0.2), gammaCorrection(true)
{}
HOGDescriptor(Size _winSize, Size _blockSize, Size _blockStride,
Size _cellSize, int _nbins, int _derivAperture=1, double _winSigma=-1,
int _histogramNormType=L2Hys, double _L2HysThreshold=0.2, bool _gammaCorrection=false)
: winSize(_winSize), blockSize(_blockSize), blockStride(_blockStride), cellSize(_cellSize),
nbins(_nbins), derivAperture(_derivAperture), winSigma(_winSigma),
histogramNormType(_histogramNormType), L2HysThreshold(_L2HysThreshold),
gammaCorrection(_gammaCorrection)
{}
HOGDescriptor(const String& filename)
{
load(filename);
}
virtual ~HOGDescriptor() {}
size_t getDescriptorSize() const;
bool checkDetectorSize() const;
double getWinSigma() const;
virtual void setSVMDetector(const vector<float>& _svmdetector);
virtual bool load(const String& filename, const String& objname=String());
virtual void save(const String& filename, const String& objname=String()) const;
virtual void compute(const Mat& img,
vector<float>& descriptors,
Size winStride=Size(), Size padding=Size(),
const vector<Point>& locations=vector<Point>()) const;
virtual void detect(const Mat& img, vector<Point>& foundLocations,
double hitThreshold=0, Size winStride=Size(),
Size padding=Size(),
const vector<Point>& searchLocations=vector<Point>()) const;
virtual void detectMultiScale(const Mat& img, vector<Rect>& foundLocations,
double hitThreshold=0, Size winStride=Size(),
Size padding=Size(), double scale=1.05,
int groupThreshold=2) const;
//Mat& angleOfs,與后文Mat& qangle不一致,懷疑是筆誤,由于qangle與angleOfs有不同含義,盡量改過(guò)來(lái) ?
virtual void computeGradient(const Mat& img, Mat& grad, Mat& angleOfs,
Size paddingTL=Size(), Size paddingBR=Size()) const;
static vector<float> getDefaultPeopleDetector();
Size winSize ;//窗口大小 ?
Size blockSize ;//Block大 小
Size blockStride ;//block每次移動(dòng)寬度包括水平和垂直兩個(gè)方向 ?
Size cellSize ;//Cell單元大小 ?
int nbins ;//直方圖bin數(shù)目 ?
int derivAperture ;//不知道什么用
double winSigma ;//高斯函數(shù)的方差 ?
int histogramNormType ;//直方圖歸一化類(lèi)型,具體見(jiàn)論文
double L2HysThreshold ;//L2Hys化中限制最大值為0.2 ?
bool gammaCorrection ;//是否Gamma校正 ?
vector<float> svmDetector ;//檢測(cè)算子
};
**********************************************************************************/
namespace cv
{
size_t HOGDescriptor::getDescriptorSize() const
{
//檢測(cè)數(shù)據(jù)的合理性
CV_Assert(blockSize.width % cellSize.width == 0 &&
blockSize.height % cellSize.height == 0);
CV_Assert((winSize.width - blockSize.width) % blockStride.width == 0 &&
(winSize.height - blockSize.height) % blockStride.height == 0 );
//Descriptor的大小
return (size_t)nbins*
(blockSize.width/cellSize.width)*
(blockSize.height/cellSize.height)*
((winSize.width - blockSize.width)/blockStride.width + 1)*
((winSize.height - blockSize.height)/blockStride.height + 1);
//9*(16/8)*(16/8)*((64-16)/8+1)*((128-16)/8+1)=9*2*2*7*15=3780,實(shí)際上的檢測(cè)算子為3781,多的1表示偏置
}
double HOGDescriptor::getWinSigma() const
{
//winSigma默認(rèn)為-1,然而有下式知,實(shí)際上為4;否則自己選擇參數(shù) ?
return winSigma >= 0 ? winSigma : (blockSize.width + blockSize.height)/8.;
}
bool HOGDescriptor::checkDetectorSize() const
{
//size_t:unsigned int
size_t detectorSize = svmDetector.size(), descriptorSize = getDescriptorSize();
//三種情況任意一種為true則表達(dá)式為true,實(shí)際上是最后一種
return detectorSize == 0 ||
detectorSize == descriptorSize ||
detectorSize == descriptorSize + 1;
}
void HOGDescriptor::setSVMDetector(const vector<float>& _svmDetector)
{
svmDetector = _svmDetector;
CV_Assert( checkDetectorSize() );
}
bool HOGDescriptor::load(const String& filename, const String& objname)
{
//XML/YML文件存儲(chǔ)
FileStorage fs(filename, FileStorage::READ);
//objname為空,!1=0,選擇fs.getFirstTopLevelNode();否則為fs[objname]
//注意到FileStorage中[]重載了:FileNode operator[](const string& nodename)(returns the top-level node by name? )
FileNode obj = !objname.empty() ? fs[objname] : fs.getFirstTopLevelNode();
if( !obj.isMap() )
return false;
FileNodeIterator it = obj["winSize"].begin();
it >> winSize.width >> winSize.height;
it = obj["blockSize"].begin();
it >> blockSize.width >> blockSize.height;
it = obj["blockStride"].begin();
it >> blockStride.width >> blockStride.height;
it = obj["cellSize"].begin();
it >> cellSize.width >> cellSize.height;
obj["nbins"] >> nbins;
obj["derivAperture"] >> derivAperture;
obj["winSigma"] >> winSigma;
obj["histogramNormType"] >> histogramNormType;
obj["L2HysThreshold"] >> L2HysThreshold;
obj["gammaCorrection"] >> gammaCorrection;
FileNode vecNode = obj["SVMDetector"];
if( vecNode.isSeq() )
{
vecNode >> svmDetector;
CV_Assert(checkDetectorSize());
}
return true;
}
void HOGDescriptor::save(const String& filename, const String& objName) const
{
FileStorage fs(filename, FileStorage::WRITE);
//空的對(duì)象名則取默認(rèn)名,輸出有一定格式,對(duì)象名后緊接{
fs << (!objName.empty() ? objName : FileStorage::getDefaultObjectName(filename)) << "{";
//之后依次為:
fs << "winSize" << winSize
<< "blockSize" << blockSize
<< "blockStride" << blockStride
<< "cellSize" << cellSize
<< "nbins" << nbins
<< "derivAperture" << derivAperture
<< "winSigma" << getWinSigma()
<< "histogramNormType" << histogramNormType
<< "L2HysThreshold" << L2HysThreshold
<< "gammaCorrection" << gammaCorrection;
if( !svmDetector.empty() )
fs << "SVMDetector" << "[:" << svmDetector << "]";
//注意還要輸出"}"
fs << "}";
}
//img:原始圖像
//grad:記錄每個(gè)像素所屬bin對(duì)應(yīng)的權(quán)重的矩陣,為幅值乘以權(quán)值
//這個(gè)權(quán)值是關(guān)鍵,也很復(fù)雜:包括高斯權(quán)重,三次插值的權(quán)重,在本函數(shù)中先值考慮幅值和相鄰bin間的插值權(quán)重
//qangle:記錄每個(gè)像素角度所屬的bin序號(hào)的矩陣,均為2通道,為了線(xiàn)性插值
//paddingTL:Top和Left擴(kuò)充像素?cái)?shù)
//paddingBR:類(lèi)似同上
//功能:計(jì)算img經(jīng)擴(kuò)張后的圖像中每個(gè)像素的梯度和角度
void HOGDescriptor::computeGradient(const Mat& img, Mat& grad, Mat& qangle,
Size paddingTL, Size paddingBR) const
{
//先判斷是否為單通道的灰度或者3通道的圖像
CV_Assert( img.type() == CV_8U || img.type() == CV_8UC3 );
//計(jì)算gradient的圖的大小,由64*128==》112*160,則會(huì)產(chǎn)生5*7=35個(gè)窗口(windowstride:8)
//每個(gè)窗口105個(gè)block,105*36=3780維特征向量
//paddingTL.width=16,paddingTL.height=24
Size gradsize(img.cols + paddingTL.width + paddingBR.width,
img.rows + paddingTL.height + paddingBR.height);
//注意grad和qangle是2通道的矩陣,為3D-trilinear插值中的orientation維度,另兩維為坐標(biāo)x與y ?
grad.create(gradsize, CV_32FC2); // <magnitude*(1-alpha), magnitude*alpha>
qangle.create(gradsize, CV_8UC2); // [0..nbins-1] - quantized gradient orientation
//wholeSize為parent matrix大小,不是擴(kuò)展后gradsize的大小
//roiofs即為img在parent matrix中的偏置
//對(duì)于正樣本img=parent matrix;但對(duì)于負(fù)樣本img是從parent img中抽取的10個(gè)隨機(jī)位置
//至于OpenCv具體是怎么操作,使得img和parent img相聯(lián)系,不是很了解
//wholeSize與roiofs僅在padding時(shí)有用,可以不管,就認(rèn)為傳入的img==parent img,是否是從parent img中取出無(wú)所謂 ?
Size wholeSize;
Point roiofs;
img.locateROI(wholeSize, roiofs);
int i, x, y;
int cn = img.channels();
//產(chǎn)生1行256列的向量,lut為列向量頭地址 ?
Mat_<float> _lut(1, 256);
const float* lut = &_lut(0,0);
//gamma校正,作者的編程思路很有意思
//初看不知道這怎么會(huì)與圖像的gamma校正有關(guān)系,壓根img都沒(méi)出現(xiàn),看到后面大家會(huì)豁然開(kāi)朗的
if( gammaCorrection )
for( i = 0; i < 256; i++ )
_lut(0,i) = std::sqrt((float)i);
else
for( i = 0; i < 256; i++ )
_lut(0,i) = (float)i;
//開(kāi)辟空間存xmap和ymap,其中各占gradsize.width+2和gradsize.height+2空間
//+2是為了計(jì)算dx,dy時(shí)用[-1,0,1]算子,即使在擴(kuò)充圖像中,其邊緣計(jì)算梯度時(shí)還是要再額外加一個(gè)像素的
//作者很喜歡直接用內(nèi)存地址及之間的關(guān)系,初看是有點(diǎn)頭大的
//另外再說(shuō)說(shuō)xmap與ymap的作用:其引入是因?yàn)閕mg圖像需要擴(kuò)充到gradsize大小
//如果我們計(jì)算img中位于(-5,-6)像素時(shí),需要將基于img的(-5,-6)坐標(biāo),映射為基于grad和qangle的坐標(biāo)(xmap,ymap) ?
AutoBuffer<int> mapbuf(gradsize.width + gradsize.height + 4);
int* xmap = (int*)mapbuf + 1;
int* ymap = xmap + gradsize.width + 2;
// BORDER_REFLECT_101:(左插值)gfedcb|abcdefgh(原始像素)|gfedcba(右插值),一種插值模式 ?const int borderType = (int)BORDER_REFLECT_101;
//borderInterpolate函數(shù)完成兩項(xiàng)操作,一是利用插值擴(kuò)充img,二是返回x-paddingTL.width+roiofs.x映射后的坐標(biāo)xmap
//例如,ximg=x(取0)-paddingTL.width(取24)+roiofs.x(取0)=-24 ==>xmap[0]=0
//即img中x=-24,映射到grad中xmap=0,并且存在xmap[0]中,至于borderInterpolate的具體操作可以不必細(xì)究 ?
for( x = -1; x < gradsize.width + 1; x++ )
xmap[x] = borderInterpolate(x - paddingTL.width + roiofs.x,
wholeSize.width, borderType);
for( y = -1; y < gradsize.height + 1; y++ )
ymap[y] = borderInterpolate(y - paddingTL.height + roiofs.y,
wholeSize.height, borderType);
// x- & y- derivatives for the whole row
// 由于后面的循環(huán)是以行為單位,每次循環(huán)內(nèi)存重復(fù)使用,所以只要記錄一行的信息而不是整個(gè)矩陣 ?
int width = gradsize.width;
AutoBuffer<float> _dbuf(width*4);
float* dbuf = _dbuf;
//注意到內(nèi)存的連續(xù)性方便之后的編程 ?
Mat Dx(1, width, CV_32F, dbuf);
Mat Dy(1, width, CV_32F, dbuf + width);
Mat Mag(1, width, CV_32F, dbuf + width*2);
Mat Angle(1, width, CV_32F, dbuf + width*3);
int _nbins = nbins;
float angleScale = (float)(_nbins/CV_PI);//9/pi
for( y = 0; y < gradsize.height; y++ )
{
//指向每行的第一個(gè)元素,img.data為矩陣的第一個(gè)元素地址
const uchar* imgPtr = img.data + img.step*ymap[y];
const uchar* prevPtr = img.data + img.step*ymap[y-1];
const uchar* nextPtr = img.data + img.step*ymap[y+1];
float* gradPtr = (float*)grad.ptr(y);
uchar* qanglePtr = (uchar*)qangle.ptr(y);
//1通道
if( cn == 1 )
{
for( x = 0; x < width; x++ )
{
int x1 = xmap[x];
//imgPtr指向img第y行首元素,imgPtr[x]即表示第(x,y)像素,其亮度值位于0~255,對(duì)應(yīng)lut[0]~lut[255]
//即若像素亮度為120,則對(duì)應(yīng)lut[120],若有g(shù)amma校正,lut[120]=sqrt(120)
//由于補(bǔ)充了虛擬像素,即在imgPtr[-1]無(wú)法表示gradsize中-1位置元素,而需要有個(gè)轉(zhuǎn)換
//imgPtr[-1-paddingTL.width+roiofs.x],即imgPtr[xmap[-1]],即gradsize中-1位置元素為img中xmap[-1]位置的元素 ?
dbuf[x] = (float)(lut[imgPtr[xmap[x+1]]] - lut[imgPtr[xmap[x-1]]]);
//由于內(nèi)存的連續(xù)性,隔width,即存Dy
dbuf[width + x] = (float)(lut[nextPtr[x1]] - lut[prevPtr[x1]]);
}
}
else
//3通道,3通道中取最大值
{
for( x = 0; x < width; x++ )
{
int x1 = xmap[x]*3;
const uchar* p2 = imgPtr + xmap[x+1]*3;
const uchar* p0 = imgPtr + xmap[x-1]*3;
float dx0, dy0, dx, dy, mag0, mag;
dx0 = lut[p2[2]] - lut[p0[2]];
dy0 = lut[nextPtr[x1+2]] - lut[prevPtr[x1+2]];
mag0 = dx0*dx0 + dy0*dy0;
dx = lut[p2[1]] - lut[p0[1]];
dy = lut[nextPtr[x1+1]] - lut[prevPtr[x1+1]];
mag = dx*dx + dy*dy;
if( mag0 < mag )
{
dx0 = dx;
dy0 = dy;
mag0 = mag;
}
dx = lut[p2[0]] - lut[p0[0]];
dy = lut[nextPtr[x1]] - lut[prevPtr[x1]];
mag = dx*dx + dy*dy;
if( mag0 < mag )
{
dx0 = dx;
dy0 = dy;
mag0 = mag;
}
dbuf[x] = dx0;
dbuf[x+width] = dy0;
}
}
//函數(shù) cvCartToPolar 計(jì)算二維向量(x(I),y(I))的長(zhǎng)度,角度:?
//magnitude(I) = sqrt(x(I)2 + y(I)2),angle(I) = atan(y(I) / x(I)),注意屬于-pi/2~pi/2
cartToPolar( Dx, Dy, Mag, Angle, false );
for( x = 0; x < width; x++ )
{
float mag = dbuf[x+width*2];
float angle = dbuf[x+width*3]*angleScale - 0.5f;//-5<=angle<=4
//判斷angle屬于哪個(gè)bin
int hidx = cvFloor(angle);
angle -= hidx;
//hidx=-5~-1===>4~8
if( hidx < 0 )
hidx += _nbins;
else if( hidx >= _nbins )
hidx -= _nbins;
//檢測(cè)是否<9
assert( (unsigned)hidx < (unsigned)_nbins );
qanglePtr[x*2] = (uchar)hidx;
hidx++;
//hidx = hidx & 1111 1111 當(dāng)hidx<nbins,即hidx=hidx
//hidx = hidx & 0000 0000 當(dāng)hidx>=nbins,即hidx=0
//注意到nbins=9時(shí),hidx最大值只為8 ?
hidx &= hidx < _nbins ? -1 : 0;
//qangle兩通道分別存放相鄰的兩個(gè)bin
qanglePtr[x*2+1] = (uchar)hidx;
//幅度,注意此時(shí)的0<angle<1,由于hidx = cvFloor(angle),angle -= hidx; ?
gradPtr[x*2] = mag*(1.f - angle);
gradPtr[x*2+1] = mag*angle;
}
}
}
//HOG存儲(chǔ)結(jié)構(gòu),每個(gè)window包含105block,每個(gè)block包含36bin
struct HOGCache
{
struct BlockData
{
BlockData() : histOfs(0), imgOffset() {}
//以block為單位,譬如block[0]中的36個(gè)bin在內(nèi)存中位于最前面
//而block[1]中的36個(gè)bin存儲(chǔ)位置在連續(xù)內(nèi)存中則有一個(gè)距離起點(diǎn)的偏置,即為histOfs:hist offset
int histOfs;
//imgOffset表示該block在檢測(cè)窗口window中的位置
Point imgOffset;
};
//PixData是作者程序中比較晦澀的部分,具體見(jiàn)后面程序分析
//gradOfs:該pixel的grad在Mat grad中的位置,是一個(gè)數(shù):(grad.cols*i+j)*2,2表示2通道
//qangleOfs:pixel的angle在Mat qangle中的位置,是一個(gè)數(shù):(qangle.cols*i+j)*2,2表示2通道
//histOfs[4]:在后面程序中,作者把一個(gè)block中的像素分為四個(gè)區(qū)域,每個(gè)區(qū)域的像素最多對(duì)四個(gè)不同Cell中的hist有貢獻(xiàn)
//即一個(gè)區(qū)域中進(jìn)行直方圖統(tǒng)計(jì),則最多包含四個(gè)Cell的不同直方圖,histOfs[i]表示每個(gè)區(qū)域中的第i個(gè)直方圖
//在整個(gè)block直方圖存儲(chǔ)空間中的距離原始位置的偏置
//顯然第一個(gè)Cell的hist其對(duì)應(yīng)的histOfs[0]=0,依次類(lèi)推有:histOfs[1]=9,histOfs[2]=18,histOfs[3]=27
//|_1_|_2_|_3_|_4_|一個(gè)block四個(gè)cell,這里把每個(gè)cell又分四分,1,2,5,6中像素統(tǒng)計(jì)屬于hist[0],3,4,7,8在hist[1]...
//|_5_|_6_|_7_|_8_|作者將一個(gè)block分為了四塊區(qū)域?yàn)?#xff1a;A:1,4,13,16/B:2,3,14,15/C:5,9,8,12/D:6,7,10,11
//|_9_|_10|_11|_12|作者認(rèn)為A區(qū)域中的像素只對(duì)其所屬的Cell中的hist有貢獻(xiàn),即此區(qū)域的像素只會(huì)產(chǎn)生一個(gè)hist
//|_13|_14|_15|_16|而B(niǎo)區(qū)域2,3的像素會(huì)對(duì)Cell0與Cell1中的hist有貢獻(xiàn),相應(yīng)的會(huì)產(chǎn)生hist[0]與hist[1],14,15類(lèi)似
//C區(qū)域與B區(qū)域類(lèi)似,會(huì)對(duì)上下兩個(gè)Cell的hist產(chǎn)生影響,而D區(qū)域會(huì)對(duì)相鄰四個(gè)Cell的hist產(chǎn)生影響
//histWeights:每個(gè)像素對(duì)不同cell的hist貢獻(xiàn)大小,由像素在block中的位置決定
//個(gè)人覺(jué)得這是論文中trilinear插值中對(duì)于position中x和y兩個(gè)維度的插值
//其中像素的角度對(duì)于相鄰兩個(gè)bin的權(quán)重在HOGDescriptor::computerGradient中已有體現(xiàn),至此trilinear完成
//其實(shí)作者認(rèn)為每個(gè)像素對(duì)于其他cell的hist的影響,其大小與該像素距各個(gè)cell中心的距離決定
//譬如處于中心的像素(8,8)可以認(rèn)為對(duì)每個(gè)cell的hist貢獻(xiàn)一樣,后面程序中權(quán)重的分配也可以看出
//gradWeight:為幅值與高斯權(quán)重的乘積
//其中高斯權(quán)重選擇exp^(-(dx^2+dy^2)/(2*sigma^2)),sigma在HOGDescriptor中決定,以block中(8,8)為中心
//區(qū)別gradWeight和histWeight,gradWeight認(rèn)為在同一個(gè)Cell中不同元素對(duì)hist的貢獻(xiàn)是不一樣的,由二維高斯分布決定
//而histweight說(shuō)的是一個(gè)元素對(duì)不同cell中的hist的貢獻(xiàn)不同,其貢獻(xiàn)由其坐標(biāo)距離各個(gè)cell的距離決定
struct PixData
{
size_t gradOfs, qangleOfs;
int histOfs[4];
float histWeights[4];
float gradWeight;
};
HOGCache();
HOGCache(const HOGDescriptor* descriptor,
const Mat& img, Size paddingTL, Size paddingBR,
bool useCache, Size cacheStride);
virtual ~HOGCache() {};
virtual void init(const HOGDescriptor* descriptor,
const Mat& img, Size paddingTL, Size paddingBR,
bool useCache, Size cacheStride);
//windowsInImage返回Image中橫豎可產(chǎn)生多少個(gè)windows
Size windowsInImage(Size imageSize, Size winStride) const;
//依據(jù)img大小,窗口移動(dòng)步伐,即窗口序號(hào)得到窗口在img中的位置
Rect getWindow(Size imageSize, Size winStride, int idx) const;
//buf為存儲(chǔ)blockdata的內(nèi)存空間,pt為block在parent img中的位置 ?
const float* getBlock(Point pt, float* buf);
virtual void normalizeBlockHistogram(float* histogram) const;
vector<PixData> pixData;
vector<BlockData> blockData;
//以下的參數(shù)是為了充分利用重疊的block信息,避免重疊的block信息重復(fù)計(jì)算采用的一種緩存思想具體見(jiàn)后面代碼 ?
bool useCache;//是否存儲(chǔ)已經(jīng)計(jì)算的block信息
vector<int> ymaxCached;//見(jiàn)后文
Size winSize, cacheStride;//cacheStride認(rèn)為等于blockStride,降低代碼的復(fù)雜性
Size nblocks, ncells;
int blockHistogramSize;
int count1, count2, count4;
Point imgoffset;//img在擴(kuò)展后圖像中img原點(diǎn)關(guān)于擴(kuò)展后原點(diǎn)偏置
Mat_<float> blockCache ;//待檢測(cè)圖像中以檢測(cè)窗口進(jìn)行橫向掃描,所掃描的block信息存儲(chǔ)在blockCache中 ?
Mat_<uchar> blockCacheFlags;
//判斷當(dāng)前block的信息blockCache中是否有存儲(chǔ),1:存儲(chǔ),于是直接調(diào)用;0:未存儲(chǔ),需要把信息存儲(chǔ)到blockCache中 ?
Mat grad, qangle;
const HOGDescriptor* descriptor;
};
HOGCache::HOGCache()
{
useCache = false;
blockHistogramSize = count1 = count2 = count4 = 0;
descriptor = 0;
}
HOGCache::HOGCache(const HOGDescriptor* _descriptor,
const Mat& _img, Size _paddingTL, Size _paddingBR,
bool _useCache, Size _cacheStride)
{
init(_descriptor, _img, _paddingTL, _paddingBR, _useCache, _cacheStride);
}
//初始化主要包括:1、block中各像素對(duì)block四個(gè)bin的貢獻(xiàn)權(quán)重,以及在存儲(chǔ)空間中的位置 記錄
//2、block的初始化,以及每個(gè)block在存儲(chǔ)空間中的偏置及在檢測(cè)窗口中的位置 記錄
//3、其他參數(shù)的賦值
//并沒(méi)有實(shí)際計(jì)算HOG
void HOGCache::init(const HOGDescriptor* _descriptor,
const Mat& _img, Size _paddingTL, Size _paddingBR,
bool _useCache, Size _cacheStride)
{
descriptor = _descriptor;
cacheStride = _cacheStride;
useCache = _useCache;
descriptor->computeGradient(_img, grad, qangle, _paddingTL, _paddingBR);
imgoffset = _paddingTL;//16,24
winSize = descriptor->winSize;//64*128
Size blockSize = descriptor->blockSize;//16*16
Size blockStride = descriptor->blockStride;//8*8
Size cellSize = descriptor->cellSize;//8*8
Size winSize = descriptor->winSize;//64*128
int i, j, nbins = descriptor->nbins;//9
int rawBlockSize = blockSize.width*blockSize.height;//16*16=256
nblocks = Size((winSize.width - blockSize.width)/blockStride.width + 1,
(winSize.height - blockSize.height)/blockStride.height + 1);//7*15=105
ncells = Size(blockSize.width/cellSize.width, blockSize.height/cellSize.height);//2*2=4
blockHistogramSize = ncells.width*ncells.height*nbins;//9*2*2=36
//對(duì)于訓(xùn)練時(shí),該段代碼不起作用;對(duì)于檢測(cè)時(shí),該段代碼可以提高運(yùn)行速度。
//在訓(xùn)練時(shí),由于樣本大小即等于檢測(cè)窗口大小,因而不需要額外存儲(chǔ)
//但是在檢測(cè)時(shí)由于待檢測(cè)圖像大于檢測(cè)窗口,因而當(dāng)檢測(cè)窗口移動(dòng)時(shí),檢測(cè)相鄰檢測(cè)窗口具有大量共同的block信息
//為了節(jié)省時(shí)間,對(duì)于之前計(jì)算過(guò)大block信息,這里只需要調(diào)用,而對(duì)于未計(jì)算過(guò)的block信息,則重新計(jì)算并存儲(chǔ)
//其具體思路如下:假設(shè)待檢測(cè)圖像640*480,檢測(cè)窗口為144*144
//待檢測(cè)圖像水平方向有79個(gè)block,檢測(cè)窗口垂直方向有17個(gè)block
//于是由以下代碼知道:blockCache為18*(79*36)=18*2844,blockCacheFlags為17*79,ymxcCached為17
//以左上角代表檢測(cè)窗口位置,當(dāng)位于(0,0)時(shí),第一次計(jì)算block信息,blockCache中是沒(méi)有保存任何信息的。
//當(dāng)位于(0,0)時(shí)須計(jì)算(也以block左上角代表block位置):
//(0,0)---->(128,0) 信息均存儲(chǔ)到blockCache中,分別為blockCache[0][0]--->blockCache[0][17*36],相應(yīng)blockCacheFlags置1
//(0,128)-->(128,128) blockCache[17][0]-->blockCache[17][17*36]
//當(dāng)檢測(cè)窗口移動(dòng)到(8,0)時(shí),可以發(fā)現(xiàn)兩個(gè)窗口中有大量信息是重復(fù)的,于是可以直接調(diào)用blockCache中相關(guān)block信息
//并把(136,0)-->(136,128)新增列的block信息加到blockCache中,同時(shí)跟新blockCacheFlags
//一直到窗口移到(624,0)進(jìn)入到下一行(0,8),上述過(guò)程持續(xù),于是blockCache中前17行存儲(chǔ)了待檢測(cè)圖像中前17*79個(gè)block信息
//當(dāng)檢測(cè)窗口移動(dòng)到(624,0)時(shí)此時(shí)blockCache已經(jīng)存儲(chǔ)滿(mǎn)了
//當(dāng)檢測(cè)窗口移動(dòng)到(0,8)時(shí),第18行的信息怎么處理呢?
//此時(shí)大家要留意的是第1行的block信息已經(jīng)沒(méi)有用啦,于是可以將第18行的信息替代第1行的信息。
//當(dāng)檢測(cè)窗口不斷橫向掃描時(shí),最新一行的信息總是會(huì)替代最舊一行的信息,如此反復(fù),達(dá)到提高運(yùn)行速度的目的
//另外需要提到一點(diǎn)的是當(dāng)block在pt=(x,y)=(0,0)-->(624,0)--->(0,128)---->(624.128)
//可以用x/cacheStride=blockStride--->Canche_X,y/blockStride--->Cache_Y
//從而從blockCache中取出對(duì)應(yīng)的blockCache[Cache_Y][Cache_X*36]
//當(dāng)pt中y>128時(shí),對(duì)應(yīng)的第18行信息存儲(chǔ)在第blockCache中的第0行
//于是我們可以用取余的辦法,y/blockStride%18--->Cache_Y,而Cache_X的計(jì)算不變
//getblock函數(shù)中代碼正是按該方法進(jìn)行操作的 ?
if( useCache )
{
//HOGCache的grad,qangle由discriptor->computerGradient得到
//grad.cols=img.cols + paddingTL.width + paddingBR.width
Size cacheSize((grad.cols - blockSize.width)/cacheStride.width+1,
(winSize.height/cacheStride.height)+1);
blockCache.create(cacheSize.height, cacheSize.width*blockHistogramSize);
blockCacheFlags.create(cacheSize);
size_t i, cacheRows = blockCache.rows;
ymaxCached.resize(cacheRows);
for( i = 0; i < cacheRows; i++ )
ymaxCached[i] = -1;
}
Mat_<float> weights(blockSize);
//sigma默認(rèn)值為4
float sigma = (float)descriptor->getWinSigma();
float scale = 1.f/(sigma*sigma*2);
//權(quán)重的二維高斯分布
for(i = 0; i < blockSize.height; i++)
for(j = 0; j < blockSize.width; j++)
{
float di = i - blockSize.height*0.5f;
float dj = j - blockSize.width*0.5f;
weights(i,j) = std::exp(-(di*di + dj*dj)*scale);
}
blockData.resize(nblocks.width*nblocks.height);//105個(gè)block
pixData.resize(rawBlockSize*3);//256*3(通道數(shù))
// Initialize 2 lookup tables, pixData & blockData.
// Here is why:
//
// The detection algorithm runs in 4 nested loops (at each pyramid layer):
// loop over the windows within the input image
// loop over the blocks within each window
// loop over the cells within each block
// loop over the pixels in each cell
//
// As each of the loops runs over a 2-dimensional array,
// we could get 8(!) nested loops in total, which is very-very slow.
//
// To speed the things up, we do the following:
// 1. loop over windows is unrolled in the HOGDescriptor::{compute|detect} methods;
// inside we compute the current search window using getWindow() method.
// Yes, it involves some overhead (function call + couple of divisions),
// but it's tiny in fact.
// 2. loop over the blocks is also unrolled. Inside we use pre-computed blockData[j]
// to set up gradient and histogram pointers.
// 3. loops over cells and pixels in each cell are merged
// (since there is no overlap between cells, each pixel in the block is processed once)
// and also unrolled. Inside we use PixData[k] to access the gradient values and
// update the histogram
//作者用查找表的方法來(lái)計(jì)算。具體實(shí)現(xiàn)時(shí)是先執(zhí)行HoGCache的初始化函數(shù)Init()
//構(gòu)造查找表,然后用getWindow()和getBlock()兩個(gè)函數(shù)實(shí)現(xiàn)的表的查找
count1 = count2 = count4 = 0;
//blockSize.width=16
for( j = 0; j < blockSize.width; j++ )
for( i = 0; i < blockSize.height; i++ )
{
PixData* data = 0;
//確定cell在block中的位置
float cellX = (j+0.5f)/cellSize.width - 0.5f;
float cellY = (i+0.5f)/cellSize.height - 0.5f;
int icellX0 = cvFloor(cellX);
int icellY0 = cvFloor(cellY);
int icellX1 = icellX0 + 1, icellY1 = icellY0 + 1;
cellX -= icellX0;
cellY -= icellY0;
//注意到unsigned,當(dāng)icellX0=-1時(shí),(unsigned)icellX0>2
//(0~3,0~3)+(0~3,12~15)+(12~15,0~3)+(12~15,12~15)
//(icellX0,icellY0,icellX1,icellY1)=(-1,-1,0,0),(-1,1,0,2),(1,-1,0,2),(1,1,2,2)===》條件4
//(4~11,4~11)==》(0,0,1,1)==》條件1
//(0~3,4~11)+(12~15,4~11)==》(-1,0,0,1)==》條件3
//(4~11,0~3)+(4~11,12~15)==》(0,-1,1,0)==》條件2
//情況2,3中元素對(duì)兩個(gè)cell中的hist有貢獻(xiàn)
//(0~3,4~11):histofs=(0,9,0,0);(12~15,4~11):histofs=(18,27,0,0)
//(4~11,0~3):histofs=(0,18,0,0);(4~11,12~15):hisofs=(9,27,0,0)
//情況1中,元素對(duì)4個(gè)cell的hist有貢獻(xiàn),則會(huì)有4個(gè)hist及histofs,并且為(0,9,18,27)
//情況4中,元素屬于一個(gè)cell,則只有一個(gè)hist,對(duì)應(yīng)的只有一個(gè)histofs:hist offset
//分別應(yīng)為:(0,0,0,0),(9,0,0,0),(18,0,0,0),(27,0,0,0)
//對(duì)于權(quán)重的理解看后面的注釋,選擇第二種情況,其他可類(lèi)推
if( (unsigned)icellX0 < (unsigned)ncells.width &&
(unsigned)icellX1 < (unsigned)ncells.width )
{
if( (unsigned)icellY0 < (unsigned)ncells.height &&
(unsigned)icellY1 < (unsigned)ncells.height )
{
data = &pixData[rawBlockSize*2 + (count4++)];
data->histOfs[0] = (icellX0*ncells.height + icellY0)*nbins;
data->histWeights[0] = (1.f - cellX)*(1.f - cellY);
data->histOfs[1] = (icellX1*ncells.height + icellY0)*nbins;
data->histWeights[1] = cellX*(1.f - cellY);
data->histOfs[2] = (icellX0*ncells.height + icellY1)*nbins;
data->histWeights[2] = (1.f - cellX)*cellY;
data->histOfs[3] = (icellX1*ncells.height + icellY1)*nbins;
data->histWeights[3] = cellX*cellY;
}
else
{
data = &pixData[rawBlockSize + (count2++)];
if( (unsigned)icellY0 < (unsigned)ncells.height )
{
icellY1 = icellY0;
cellY = 1.f - cellY;
}
//|_1_|_2_|_3_|_4_|第二中情況是位于(2,3),(14,15)。感性上可以認(rèn)為(2,3)中的像素對(duì)cell0與cell1的貢獻(xiàn)中
//|_5_|_6_|_7_|_8_|其中y分量的貢獻(xiàn)都是相同的,由于距離各cell的中心距離相同,而x分量的影響是不同的
//|_9_|_10|_11|_12|所以權(quán)重的分配為(1-cellx)*celly和cellx*celly
//|_13|_14|_15|_16|
//挑了中簡(jiǎn)單的情況,情況1中可以類(lèi)似分析 ?
data->histOfs[0] = (icellX0*ncells.height + icellY1)*nbins;
data->histWeights[0] = (1.f - cellX)*cellY;
data->histOfs[1] = (icellX1*ncells.height + icellY1)*nbins;
data->histWeights[1] = cellX*cellY;
data->histOfs[2] = data->histOfs[3] = 0;
data->histWeights[2] = data->histWeights[3] = 0;
}
}
else
{
if( (unsigned)icellX0 < (unsigned)ncells.width )
{
icellX1 = icellX0;
cellX = 1.f - cellX;
}
if( (unsigned)icellY0 < (unsigned)ncells.height &&
(unsigned)icellY1 < (unsigned)ncells.height )
{
data = &pixData[rawBlockSize + (count2++)];
data->histOfs[0] = (icellX1*ncells.height + icellY0)*nbins;
data->histWeights[0] = cellX*(1.f - cellY);
data->histOfs[1] = (icellX1*ncells.height + icellY1)*nbins;
data->histWeights[1] = cellX*cellY;
data->histOfs[2] = data->histOfs[3] = 0;
data->histWeights[2] = data->histWeights[3] = 0;
}
else
{
data = &pixData[count1++];
if( (unsigned)icellY0 < (unsigned)ncells.height )
{
icellY1 = icellY0;
cellY = 1.f - cellY;
}
data->histOfs[0] = (icellX1*ncells.height + icellY1)*nbins;
data->histWeights[0] = cellX*cellY;
data->histOfs[1] = data->histOfs[2] = data->histOfs[3] = 0;
data->histWeights[1] = data->histWeights[2] = data->histWeights[3] = 0;
}
}
data->gradOfs = (grad.cols*i + j)*2;
data->qangleOfs = (qangle.cols*i + j)*2;
data->gradWeight = weights(i,j);
}
assert( count1 + count2 + count4 == rawBlockSize );//rawBlockSize=105*36=3780
// defragment pixData,重新整理數(shù)據(jù)使其連貫存儲(chǔ)
//由圖1表示,內(nèi)存中存儲(chǔ)順序?yàn)?#xff1a;1,4,13,16/2,3,5,8,9,12,14,15/6,7,10,11區(qū)域像素的信息
for( j = 0; j < count2; j++ )
pixData[j + count1] = pixData[j + rawBlockSize];
for( j = 0; j < count4; j++ )
pixData[j + count1 + count2] = pixData[j + rawBlockSize*2];
count2 += count1;
count4 += count2;
// 初始化blockData
for( j = 0; j < nblocks.width; j++ )
for( i = 0; i < nblocks.height; i++ )
{
BlockData& data = blockData[j*nblocks.height + i];
//histofs:hist off set,直方圖信息在blockData中的偏置
data.histOfs = (j*nblocks.height + i)*blockHistogramSize;
data.imgOffset = Point(j*blockStride.width,i*blockStride.height);
}
}
//buf:存儲(chǔ)空間
//pt:block在parent img中的坐標(biāo),或偏置(左上角)
//只獲取一個(gè)block中的信息:將256個(gè)像素的grad和angle信息變?yōu)?6個(gè)bin的信息并保存
const float* HOGCache::getBlock(Point pt, float* buf)
{
float* blockHist = buf;
assert(descriptor != 0);
Size blockSize = descriptor->blockSize;
//imgoffset = _paddingTL;16,24,從parent img==>grad img的坐標(biāo)
pt += imgoffset;
CV_Assert( (unsigned)pt.x <= (unsigned)(grad.cols - blockSize.width) &&
(unsigned)pt.y <= (unsigned)(grad.rows - blockSize.height) );
//相關(guān)解釋見(jiàn)init函數(shù)注釋
if( useCache )
{
CV_Assert( pt.x % cacheStride.width == 0 &&
pt.y % cacheStride.height == 0 );
Point cacheIdx(pt.x/cacheStride.width,
(pt.y/cacheStride.height) % blockCache.rows);
if( pt.y != ymaxCached[cacheIdx.y] )
{
Mat_<uchar> cacheRow = blockCacheFlags.row(cacheIdx.y);
cacheRow = (uchar)0;
ymaxCached[cacheIdx.y] = pt.y;
}
blockHist = &blockCache[cacheIdx.y][cacheIdx.x*blockHistogramSize];
uchar& computedFlag = blockCacheFlags(cacheIdx.y, cacheIdx.x);
if( computedFlag != 0 )
return blockHist;
computedFlag = (uchar)1; // set it at once, before actual computing
}
int k, C1 = count1, C2 = count2, C4 = count4;
//pt.x*2由于是2通道,記錄block左上角對(duì)應(yīng)在grad.data和qangle.data中的位置
const float* gradPtr = (const float*)(grad.data + grad.step*pt.y) + pt.x*2;
const uchar* qanglePtr = qangle.data + qangle.step*pt.y + pt.x*2;
CV_Assert( blockHist != 0 );
//blockHistogramSize=36
for( k = 0; k < blockHistogramSize; k++ )
blockHist[k] = 0.f;
//pixData包含256個(gè)元素,blockData包含105個(gè)block
const PixData* _pixData = &pixData[0];
//遍歷一個(gè)block中所有像素256個(gè),以像素為單位取
//一個(gè)像素包含:gradofs,qangleofs,gradweight,histofs[4],histweight[4]
for( k = 0; k < C1; k++ )
{
const PixData& pk = _pixData[k];
const float* a = gradPtr + pk.gradOfs ;//gradPtr起始地址,由不同輸入Point pt而變化,pk.gradOfs偏置 ?
float w = pk.gradWeight*pk.histWeights[0];
const uchar* h = qanglePtr + pk.qangleOfs;
int h0 = h[0], h1 = h[1 ];//h[0]為angle所在bin的位置0~8,hist[h0]表示第h0個(gè)bin其中存儲(chǔ)的是相應(yīng)的幅度與權(quán)重 ?
float* hist = blockHist + pk.histOfs[0]; //blockHist為buff的地址,histOfs即為偏置 ?
float t0 = hist[h0] + a[0]*w;
float t1 = hist[h1] + a[1]*w;
hist[h0] = t0; hist[h1] = t1;
}
//兩個(gè)
for( ; k < C2; k++ )
{
const PixData& pk = _pixData[k];
const float* a = gradPtr + pk.gradOfs;
float w, t0, t1, a0 = a[0], a1 = a[1];
const uchar* h = qanglePtr + pk.qangleOfs;
int h0 = h[0], h1 = h[1];
float* hist = blockHist + pk.histOfs[0];
w = pk.gradWeight*pk.histWeights[0];
t0 = hist[h0] + a0*w;
t1 = hist[h1] + a1*w;
hist[h0] = t0; hist[h1] = t1;
hist = blockHist + pk.histOfs[1];
w = pk.gradWeight*pk.histWeights[1];
t0 = hist[h0] + a0*w;
t1 = hist[h1] + a1*w;
hist[h0] = t0; hist[h1] = t1;
}
//四個(gè)
for( ; k < C4; k++ )
{
const PixData& pk = _pixData[k];
const float* a = gradPtr + pk.gradOfs;
float w, t0, t1, a0 = a[0], a1 = a[1];
const uchar* h = qanglePtr + pk.qangleOfs;
int h0 = h[0], h1 = h[1];
float* hist = blockHist + pk.histOfs[0];
w = pk.gradWeight*pk.histWeights[0];
t0 = hist[h0] + a0*w;
t1 = hist[h1] + a1*w;
hist[h0] = t0; hist[h1] = t1;
hist = blockHist + pk.histOfs[1];
w = pk.gradWeight*pk.histWeights[1];
t0 = hist[h0] + a0*w;
t1 = hist[h1] + a1*w;
hist[h0] = t0; hist[h1] = t1;
hist = blockHist + pk.histOfs[2];
w = pk.gradWeight*pk.histWeights[2];
t0 = hist[h0] + a0*w;
t1 = hist[h1] + a1*w;
hist[h0] = t0; hist[h1] = t1;
hist = blockHist + pk.histOfs[3];
w = pk.gradWeight*pk.histWeights[3];
t0 = hist[h0] + a0*w;
t1 = hist[h1] + a1*w;
hist[h0] = t0; hist[h1] = t1;
}
normalizeBlockHistogram(blockHist);
return blockHist;
}
void HOGCache::normalizeBlockHistogram(float* _hist) const
{
float* hist = &_hist[0];
size_t i, sz = blockHistogramSize;
float sum = 0;
for( i = 0; i < sz; i++ )
sum += hist[i]*hist[i];
//為啥+sz*0.1=25.6??難道是實(shí)驗(yàn)經(jīng)驗(yàn)?? ?
float scale = 1.f/(std::sqrt(sum)+sz*0.1f);
float thresh = (float)descriptor->L2HysThreshold;//缺省值0.2
for( i = 0, sum = 0; i < sz; i++ )
{
hist[i] = std::min(hist[i]*scale, thresh);//限制最大值為0.2
sum += hist[i]*hist[i];
}
//在歸一化一遍,使得各項(xiàng)平方和為1,即單位化
scale = 1.f/(std::sqrt(sum)+1e-3f);
for( i = 0; i < sz; i++ )
hist[i] *= scale;
}
Size HOGCache::windowsInImage(Size imageSize, Size winStride) const
{
return Size((imageSize.width - winSize.width)/winStride.width + 1,
(imageSize.height - winSize.height)/winStride.height + 1);
}
//依據(jù)img大小,窗口移動(dòng)步伐,即窗口序號(hào)得到窗口在img中的位置
Rect HOGCache::getWindow(Size imageSize, Size winStride, int idx) const
{
int nwindowsX = (imageSize.width - winSize.width)/winStride.width + 1;
int y = idx / nwindowsX;//會(huì)自動(dòng)取整
int x = idx - nwindowsX*y;
return Rect( x*winStride.width, y*winStride.height, winSize.width, winSize.height );
}
//img:待檢測(cè)或計(jì)算的圖像
//descriptors:Hog描述結(jié)構(gòu)
//winStride:窗口移動(dòng)步伐
//padding:擴(kuò)充圖像相關(guān)尺寸
//locations:對(duì)于正樣本可以直接取(0,0),負(fù)樣本為隨機(jī)產(chǎn)生合理坐標(biāo)范圍內(nèi)的點(diǎn)坐標(biāo)
void HOGDescriptor::compute(const Mat& img, vector<float>& descriptors,
Size winStride, Size padding,
const vector<Point>& locations) const
{
//若winStride.width=0,winStride.height=0,取(8,8)
if( winStride == Size() )
winStride = cellSize;
//gcd(a,b)可認(rèn)為取小的
//默認(rèn)的winStride=blockStride,暫時(shí)忽視
Size cacheStride(gcd(winStride.width, blockStride.width),
gcd(winStride.height, blockStride.height));
//正樣本只有一個(gè)窗口,如果未擴(kuò)充
//負(fù)樣本按論文中所說(shuō)會(huì)隨機(jī)產(chǎn)生10副圖,若未擴(kuò)充則會(huì)有10個(gè)窗口 ?
size_t nwindows = locations.size();
//alignSize(size_t sz, int n)
返回n的倍數(shù)中不小于sz的最小數(shù),對(duì)padding.width進(jìn)行修正
//由默認(rèn)參數(shù)有cacheStride=blockStride=(8,8),padding.width=24,padding.height=16,所以也不需要修正,可忽視 ?
padding.width = (int)alignSize(std::max(padding.width, 0), cacheStride.width);
padding.height = (int)alignSize(std::max(padding.height, 0), cacheStride.height);
Size paddedImgSize(img.cols + padding.width*2, img.rows + padding.height*2);
//HOGCache(const HOGDescriptor* _descriptor,const Mat& _img, Size _paddingTL, Size _paddingBR,bool _useCache, Size _cacheStride)
//nwindows==0表示useCache=1
HOGCache cache(this, img, padding, padding, nwindows == 0, cacheStride);
//當(dāng)nwidows=0時(shí)擴(kuò)充圖像,之后再計(jì)算共有多少窗口area()=size.width*size.height,windowsInImage返回的是nwidth和nheight
//在檢測(cè)時(shí)會(huì)有用,由于檢測(cè)時(shí)是不知道要計(jì)算哪塊區(qū)域的,所以需要對(duì)整副圖像需要多少窗口
//訓(xùn)練時(shí)由于樣本大小均為窗口大小,所以不需要額外存儲(chǔ)block信息,則useCache=0,nwindows=1;
//檢測(cè)時(shí)由于待檢測(cè)圖像大于檢測(cè)窗口大小,所以需要額外存儲(chǔ)重復(fù)的block信息,則useCache=1,需要重新計(jì)算nwindows
//detect函數(shù)中的useCache默認(rèn)值為1,即檢測(cè)時(shí)是需要額外存儲(chǔ)block信息的
//compute函數(shù)中的useCache默認(rèn)值為0,detect會(huì)調(diào)用compute,會(huì)改變useCache的值 ?
if( !nwindows )
nwindows = cache.windowsInImage(paddedImgSize, winStride).area();
const HOGCache::BlockData* blockData = &cache.blockData[0];
int nblocks = cache.nblocks.area();
int blockHistogramSize = cache.blockHistogramSize;
size_t dsize = getDescriptorSize();//一個(gè)窗口中特征向量大小:2*2*9*15*7=3780
descriptors.resize(dsize*nwindows);//注意到算法中樣本大小為64*128,但實(shí)際上是有擴(kuò)充的,實(shí)際特征向量還要乘上nwindows
//descriptor存儲(chǔ)分nwindows段,每段又分nblocks=105段,每段又有36個(gè)bin
for( size_t i = 0; i < nwindows; i++ )
{
float* descriptor = &descriptors[i*dsize];
Point pt0;
//locations.empty()為空返回1
//不為空時(shí)
if( !locations.empty() )
{
pt0 = locations[i];
if( pt0.x < -padding.width || pt0.x > img.cols + padding.width - winSize.width ||
pt0.y < -padding.height || pt0.y > img.rows + padding.height - winSize.height )
continue;
}
//為空時(shí):
else
{
pt0 = cache.getWindow(paddedImgSize, winStride, (int)i).tl() - Point(padding);
CV_Assert(pt0.x % cacheStride.width == 0 && pt0.y % cacheStride.height == 0);
}
for( int j = 0; j < nblocks; j++ )//nblocks=105
{
const HOGCache::BlockData& bj = blockData[j];
//imgOffset = Point(j*blockStride.width,i*blockStride.height),block在window中的位置
//pt0:為img在parent img中的位置,注意到getBlock(pt,dst)中pt就是指的在parent img中的位置 ?
Point pt = pt0 + bj.imgOffset;
//histOfs=(j*nblocks.height + i)*blockHistogramSize,nblocks.height=15
float* dst = descriptor + bj.histOfs;
//dst只是該block的存儲(chǔ)空間,pt表示該block在圖中的位置,src才是計(jì)算后的直方圖,將其賦值給dst
const float* src = cache.getBlock(pt, dst);
if( src != dst )
for( int k = 0; k < blockHistogramSize; k++ )//blockHistogramSize=36
dst[k] = src[k];
}
}
}
//hits:檢測(cè)圖像中存在目標(biāo)的區(qū)域的坐標(biāo)
//hitThreshold:為目標(biāo)的閾值
//img:不要求為64*128
//處理固定尺度上目標(biāo)的檢測(cè),detectMultiScale中Scale循環(huán),每個(gè)循環(huán)中調(diào)用detect
void HOGDescriptor::detect(const Mat& img,
vector<Point>& hits, double hitThreshold,
Size winStride, Size padding, const vector<Point>& locations) const
{
hits.clear();
if( svmDetector.empty() )
return;
if( winStride == Size() )
winStride = cellSize;
Size cacheStride(gcd(winStride.width, blockStride.width),
gcd(winStride.height, blockStride.height));
size_t nwindows = locations.size();
padding.width = (int)alignSize(std::max(padding.width, 0), cacheStride.width);
padding.height = (int)alignSize(std::max(padding.height, 0), cacheStride.height);
Size paddedImgSize(img.cols + padding.width*2, img.rows + padding.height*2);
HOGCache cache(this, img, padding, padding, nwindows == 0, cacheStride);
if( !nwindows )
nwindows = cache.windowsInImage(paddedImgSize, winStride).area();
const HOGCache::BlockData* blockData = &cache.blockData[0];
int nblocks = cache.nblocks.area();
int blockHistogramSize = cache.blockHistogramSize;
size_t dsize = getDescriptorSize();
double rho = svmDetector.size() > dsize ? svmDetector[dsize] : 0;
vector<float> blockHist(blockHistogramSize);
for( size_t i = 0; i < nwindows; i++ )
{
Point pt0;
if( !locations.empty() )
{
pt0 = locations[i];
if( pt0.x < -padding.width || pt0.x > img.cols + padding.width - winSize.width ||
pt0.y < -padding.height || pt0.y > img.rows + padding.height - winSize.height )
continue;
}
else
{
pt0 = cache.getWindow(paddedImgSize, winStride, (int)i).tl() - Point(padding);
CV_Assert(pt0.x % cacheStride.width == 0 && pt0.y % cacheStride.height == 0);
}
double s = rho;
const float* svmVec = &svmDetector[0];
int j, k;
for( j = 0; j < nblocks; j++, svmVec += blockHistogramSize )
{
const HOGCache::BlockData& bj = blockData[j];
Point pt = pt0 + bj.imgOffset;
const float* vec = cache.getBlock(pt, &blockHist[0]);
//分兩步,考慮到檢測(cè)算子中的偏置
for( k = 0; k <= blockHistogramSize - 4; k += 4 )
s += vec[k]*svmVec[k] + vec[k+1]*svmVec[k+1] +
vec[k+2]*svmVec[k+2] + vec[k+3]*svmVec[k+3];
for( ; k < blockHistogramSize; k++ )
s += vec[k]*svmVec[k];
}
if( s >= hitThreshold )
hits.push_back(pt0);
}
}
struct HOGThreadData
{
vector<Rect> rectangles;
vector<Point> locations;
Mat smallerImgBuf;
};
void HOGDescriptor::detectMultiScale(
const Mat& img, vector<Rect>& foundLocations,
double hitThreshold, Size winStride, Size padding,
double scale0, int groupThreshold) const
{
double scale = 1.;
foundLocations.clear();
int i, levels = 0;
const int maxLevels = 64;
//getNumThreads得到線(xiàn)程最大數(shù)目
int t, nthreads = getNumThreads();
vector<HOGThreadData> threadData(nthreads);
for( t = 0; t < nthreads; t++ )
threadData[t].smallerImgBuf.create(img.size(), img.type());
vector<double> levelScale(maxLevels);
//計(jì)算出最大層數(shù),基本是將圖像縮小,即認(rèn)為樣本尺度已經(jīng)很小了,實(shí)際的行人只會(huì)大于樣本尺寸,小于樣本尺寸的行人無(wú)法檢測(cè) ?
for( levels = 0; levels < maxLevels; levels++ )
{
levelScale[levels] = scale;
if( cvRound(img.cols/scale) < winSize.width ||
cvRound(img.rows/scale) < winSize.height ||
scale0 <= 1 )
break;
scale *= scale0;
}
levels = std::max(levels, 1);
levelScale.resize(levels);
#ifdef _OPENMP
#pragma omp parallel for num_threads(nthreads) schedule(dynamic)
#endif // _OPENMP
//外循環(huán)為尺度金字塔循環(huán)
for( i = 0; i < levels; i++ )
{
//getThreadNum:得到OpenCV正在用的線(xiàn)程序號(hào)
HOGThreadData& tdata = threadData[getThreadNum()];
double scale = levelScale[i];
Size sz(cvRound(img.cols/scale), cvRound(img.rows/scale));
Mat smallerImg(sz, img.type(), tdata.smallerImgBuf.data);
//縮小圖像
if( sz == img.size() )
smallerImg = Mat(sz, img.type(), img.data, img.step);
else
resize(img, smallerImg, sz);
//每層的檢測(cè)
detect(smallerImg, tdata.locations, hitThreshold, winStride, padding);
Size scaledWinSize = Size(cvRound(winSize.width*scale), cvRound(winSize.height*scale));
for( size_t j = 0; j < tdata.locations.size(); j++ )
tdata.rectangles.push_back(Rect(
cvRound(tdata.locations[j].x*scale),
cvRound(tdata.locations[j].y*scale),
scaledWinSize.width, scaledWinSize.height));
}
}
for( t = 0; t < nthreads; t++ )
{
HOGThreadData& tdata = threadData[t];
//將tdata.rectagnles中的數(shù)據(jù)拷貝到foundLocation中
std::copy(tdata.rectangles.begin(), tdata.rectangles.end(),
std::back_inserter(foundLocations));
}
//從一群找到的矩形區(qū)域提取出一個(gè),這里直接調(diào)用了函數(shù),我們可以不細(xì)究
groupRectangles(foundLocations, groupThreshold, 0.2);
}
vector<float> HOGDescriptor::getDefaultPeopleDetector()
{
static const float detector[] = {0,0};
return vector<float>(detector, detector + sizeof(detector)/sizeof(detector[0]));
}
}
以上為HOG代碼的注釋與理解~不清楚的歡迎提問(wèn)~有不對(duì)的地方,歡迎指出~
總結(jié)
以上是生活随笔為你收集整理的Hog源码注释(hog.cpp的注解)的全部?jī)?nèi)容,希望文章能夠幫你解決所遇到的問(wèn)題。
- 上一篇: 【Vue 4 笔记 】(一)
- 下一篇: DCI模型架构