faster rcnn源码理解(二)之AnchorTargetLayer(网络中的rpn_data)
生活随笔
收集整理的這篇文章主要介紹了
faster rcnn源码理解(二)之AnchorTargetLayer(网络中的rpn_data)
小編覺得挺不錯的,現在分享給大家,幫大家做個參考.
轉載自:faster rcnn源碼理解(二)之AnchorTargetLayer(網絡中的rpn_data) - 野孩子的專欄 - 博客頻道 - CSDN.NET
#?--------------------------------------------------------?? #?Faster?R-CNN?? #?Copyright?(c)?2015?Microsoft?? #?Licensed?under?The?MIT?License?[see?LICENSE?for?details]?? #?Written?by?Ross?Girshick?and?Sean?Bell?? #?--------------------------------------------------------?? ?? import?os?? import?caffe?? import?yaml?? from?fast_rcnn.config?import?cfg?? import?numpy?as?np?? import?numpy.random?as?npr?? from?generate_anchors?import?generate_anchors?? from?utils.cython_bbox?import?bbox_overlaps?? from?fast_rcnn.bbox_transform?import?bbox_transform?? ?? DEBUG?=?False?? ?? class?AnchorTargetLayer(caffe.Layer):?? ????"""? ????Assign?anchors?to?ground-truth?targets.?Produces?anchor?classification? ????labels?and?bounding-box?regression?targets.? ????"""?? ?? ????def?setup(self,?bottom,?top):?? ????????layer_params?=?yaml.load(self.param_str_)?? ????????anchor_scales?=?layer_params.get('scales',?(8,?16,?32))?? ????????self._anchors?=?generate_anchors(scales=np.array(anchor_scales))#九個anchor的w?h?x_cstr?y_cstr,對原始的wh做橫向縱向變化,并放大縮小得到九個?? ????????self._num_anchors?=?self._anchors.shape[0]<span?style="font-family:?Arial,?Helvetica,?sans-serif;">#anchor的個數</span>?? ????????self._feat_stride?=?layer_params['feat_stride']#網絡中參數16?? ?? ????????if?DEBUG:?? ????????????print?'anchors:'?? ????????????print?self._anchors?? ????????????print?'anchor?shapes:'?? ????????????print?np.hstack((?? ????????????????self._anchors[:,?2::4]?-?self._anchors[:,?0::4],?? ????????????????self._anchors[:,?3::4]?-?self._anchors[:,?1::4],?? ????????????))?? ????????????self._counts?=?cfg.EPS?? ????????????self._sums?=?np.zeros((1,?4))?? ????????????self._squared_sums?=?np.zeros((1,?4))?? ????????????self._fg_sum?=?0?? ????????????self._bg_sum?=?0?? ????????????self._count?=?0?? ?? ????????#?allow?boxes?to?sit?over?the?edge?by?a?small?amount?? ????????self._allowed_border?=?layer_params.get('allowed_border',?0)?? ????????#bottom?長度為4;bottom[0],map;bottom[1],boxes,labels;bottom[2],im_fo;bottom[3],圖片數據?? ????????height,?width?=?bottom[0].data.shape[-2:]?? ????????if?DEBUG:?? ????????????print?'AnchorTargetLayer:?height',?height,?'width',?width?? ?? ????????A?=?self._num_anchors#anchor的個數?? ????????#?labels?? ????????top[0].reshape(1,?1,?A?*?height,?width)?? ????????#?bbox_targets?? ????????top[1].reshape(1,?A?*?4,?height,?width)?? ????????#?bbox_inside_weights?? ????????top[2].reshape(1,?A?*?4,?height,?width)?? ????????#?bbox_outside_weights?? ????????top[3].reshape(1,?A?*?4,?height,?width)?? ?? ????def?forward(self,?bottom,?top):?? ????????#?Algorithm:?? ????????#?? ????????#?for?each?(H,?W)?location?i?? ????????#???generate?9?anchor?boxes?centered?on?cell?i?? ????????#???apply?predicted?bbox?deltas?at?cell?i?to?each?of?the?9?anchors?? ????????#?filter?out-of-image?anchors?? ????????#?measure?GT?overlap?? ?? ????????assert?bottom[0].data.shape[0]?==?1,?\?? ????????????'Only?single?item?batches?are?supported'?? ?? ????????#?map?of?shape?(...,?H,?W)?? ????????height,?width?=?bottom[0].data.shape[-2:]?? ????????#?GT?boxes?(x1,?y1,?x2,?y2,?label)?? ????????gt_boxes?=?bottom[1].data#gt_boxes:長度不定?? ????????#?im_info?? ????????im_info?=?bottom[2].data[0,?:]?? ?? ????????if?DEBUG:?? ????????????print?''?? ????????????print?'im_size:?({},?{})'.format(im_info[0],?im_info[1])?? ????????????print?'scale:?{}'.format(im_info[2])?? ????????????print?'height,?width:?({},?{})'.format(height,?width)?? ????????????print?'rpn:?gt_boxes.shape',?gt_boxes.shape?? ????????????print?'rpn:?gt_boxes',?gt_boxes?? ?? ????????#?1.?Generate?proposals?from?bbox?deltas?and?shifted?anchors?? ????????shift_x?=?np.arange(0,?width)?*?self._feat_stride?? ????????shift_y?=?np.arange(0,?height)?*?self._feat_stride?? ????????shift_x,?shift_y?=?np.meshgrid(shift_x,?shift_y)?? ????????shifts?=?np.vstack((shift_x.ravel(),?shift_y.ravel(),?? ????????????????????????????shift_x.ravel(),?shift_y.ravel())).transpose()?? ????????#?add?A?anchors?(1,?A,?4)?to?? ????????#?cell?K?shifts?(K,?1,?4)?to?get?? ????????#?shift?anchors?(K,?A,?4)?? ????????#?reshape?to?(K*A,?4)?shifted?anchors?? ????????A?=?self._num_anchors?? ????????K?=?shifts.shape[0]?? ????????all_anchors?=?(self._anchors.reshape((1,?A,?4))?+?? ???????????????????????shifts.reshape((1,?K,?4)).transpose((1,?0,?2)))?? ????????all_anchors?=?all_anchors.reshape((K?*?A,?4))?? ????????total_anchors?=?int(K?*?A)#K*A,所有anchors個數,包括越界的?? ????????#K:?width*height?? ????????#A:?9?? ????????#?only?keep?anchors?inside?the?image?? ????????inds_inside?=?np.where(?? ????????????(all_anchors[:,?0]?>=?-self._allowed_border)?&?? ????????????(all_anchors[:,?1]?>=?-self._allowed_border)?&?? ????????????(all_anchors[:,?2]?<?im_info[1]?+?self._allowed_border)?&??#?width?? ????????????(all_anchors[:,?3]?<?im_info[0]?+?self._allowed_border)????#?height?? ????????)[0]#沒有過界的anchors索引?? ?? ????????if?DEBUG:?? ????????????print?'total_anchors',?total_anchors?? ????????????print?'inds_inside',?len(inds_inside)?? ?? ????????#?keep?only?inside?anchors?? ????????anchors?=?all_anchors[inds_inside,?:]#沒有過界的anchors?? ????????if?DEBUG:?? ????????????print?'anchors.shape',?anchors.shape?? ?? ????????#?label:?1?is?positive,?0?is?negative,?-1?is?dont?care?? ????????labels?=?np.empty((len(inds_inside),?),?dtype=np.float32)?? ????????labels.fill(-1)?? ?? ????????#?overlaps?between?the?anchors?and?the?gt?boxes?? ????????#?overlaps?(ex,?gt)?? ????????overlaps?=?bbox_overlaps(?? ????????????np.ascontiguousarray(anchors,?dtype=np.float),?? ????????????np.ascontiguousarray(gt_boxes,?dtype=np.float))?? ????????argmax_overlaps?=?overlaps.argmax(axis=1)#overlaps每行最大值索引?? ????????max_overlaps?=?overlaps[np.arange(len(inds_inside)),?argmax_overlaps]?? ????????gt_argmax_overlaps?=?overlaps.argmax(axis=0)?? ????????gt_max_overlaps?=?overlaps[gt_argmax_overlaps,?? ???????????????????????????????????np.arange(overlaps.shape[1])]?? ????????gt_argmax_overlaps?=?np.where(overlaps?==?gt_max_overlaps)[0]?? ?? ????????if?not?cfg.TRAIN.RPN_CLOBBER_POSITIVES:?? ????????????#?assign?bg?labels?first?so?that?positive?labels?can?clobber?them?? ????????????labels[max_overlaps?<?cfg.TRAIN.RPN_NEGATIVE_OVERLAP]?=?0?? ?? ????????#?fg?label:?for?each?gt,?anchor?with?highest?overlap?? ????????labels[gt_argmax_overlaps]?=?1?? ?? ????????#?fg?label:?above?threshold?IOU?? ????????labels[max_overlaps?>=?cfg.TRAIN.RPN_POSITIVE_OVERLAP]?=?1?? ?? ????????if?cfg.TRAIN.RPN_CLOBBER_POSITIVES:?? ????????????#?assign?bg?labels?last?so?that?negative?labels?can?clobber?positives?? ????????????labels[max_overlaps?<?cfg.TRAIN.RPN_NEGATIVE_OVERLAP]?=?0?? ?? ????????#?subsample?positive?labels?if?we?have?too?many?? ????????num_fg?=?int(cfg.TRAIN.RPN_FG_FRACTION?*?cfg.TRAIN.RPN_BATCHSIZE)?? ????????fg_inds?=?np.where(labels?==?1)[0]?? ????????if?len(fg_inds)?>?num_fg:?? ????????????disable_inds?=?npr.choice(?? ????????????????fg_inds,?size=(len(fg_inds)?-?num_fg),?replace=False)?? ????????????labels[disable_inds]?=?-1?? ?? ????????#?subsample?negative?labels?if?we?have?too?many?? ????????num_bg?=?cfg.TRAIN.RPN_BATCHSIZE?-?np.sum(labels?==?1)?? ????????bg_inds?=?np.where(labels?==?0)[0]?? ????????if?len(bg_inds)?>?num_bg:?? ????????????disable_inds?=?npr.choice(?? ????????????????bg_inds,?size=(len(bg_inds)?-?num_bg),?replace=False)?? ????????????labels[disable_inds]?=?-1?? ????????????#print?"was?%s?inds,?disabling?%s,?now?%s?inds"?%?(?? ????????????????#len(bg_inds),?len(disable_inds),?np.sum(labels?==?0))?? ?? ????????bbox_targets?=?np.zeros((len(inds_inside),?4),?dtype=np.float32)?? ????????bbox_targets?=?_compute_targets(anchors,?gt_boxes[argmax_overlaps,?:])?? ?? ????????bbox_inside_weights?=?np.zeros((len(inds_inside),?4),?dtype=np.float32)?? ????????bbox_inside_weights[labels?==?1,?:]?=?np.array(cfg.TRAIN.RPN_BBOX_INSIDE_WEIGHTS)?? ?? ????????bbox_outside_weights?=?np.zeros((len(inds_inside),?4),?dtype=np.float32)?? ????????if?cfg.TRAIN.RPN_POSITIVE_WEIGHT?<?0:?? ????????????#?uniform?weighting?of?examples?(given?non-uniform?sampling)?? ????????????num_examples?=?np.sum(labels?>=?0)?? ????????????positive_weights?=?np.ones((1,?4))?*?1.0?/?num_examples?? ????????????negative_weights?=?np.ones((1,?4))?*?1.0?/?num_examples?? ????????else:?? ????????????assert?((cfg.TRAIN.RPN_POSITIVE_WEIGHT?>?0)?&?? ????????????????????(cfg.TRAIN.RPN_POSITIVE_WEIGHT?<?1))?? ????????????positive_weights?=?(cfg.TRAIN.RPN_POSITIVE_WEIGHT?/?? ????????????????????????????????np.sum(labels?==?1))?? ????????????negative_weights?=?((1.0?-?cfg.TRAIN.RPN_POSITIVE_WEIGHT)?/?? ????????????????????????????????np.sum(labels?==?0))?? ????????bbox_outside_weights[labels?==?1,?:]?=?positive_weights?? ????????bbox_outside_weights[labels?==?0,?:]?=?negative_weights?? ?? ????????if?DEBUG:?? ????????????self._sums?+=?bbox_targets[labels?==?1,?:].sum(axis=0)?? ????????????self._squared_sums?+=?(bbox_targets[labels?==?1,?:]?**?2).sum(axis=0)?? ????????????self._counts?+=?np.sum(labels?==?1)?? ????????????means?=?self._sums?/?self._counts?? ????????????stds?=?np.sqrt(self._squared_sums?/?self._counts?-?means?**?2)?? ????????????print?'means:'?? ????????????print?means?? ????????????print?'stdevs:'?? ????????????print?stds?? ?? ????????#?map?up?to?original?set?of?anchors?? ????????labels?=?_unmap(labels,?total_anchors,?inds_inside,?fill=-1)?? ????????bbox_targets?=?_unmap(bbox_targets,?total_anchors,?inds_inside,?fill=0)?? ????????bbox_inside_weights?=?_unmap(bbox_inside_weights,?total_anchors,?inds_inside,?fill=0)?? ????????bbox_outside_weights?=?_unmap(bbox_outside_weights,?total_anchors,?inds_inside,?fill=0)?? ?? ????????if?DEBUG:?? ????????????print?'rpn:?max?max_overlap',?np.max(max_overlaps)?? ????????????print?'rpn:?num_positive',?np.sum(labels?==?1)?? ????????????print?'rpn:?num_negative',?np.sum(labels?==?0)?? ????????????self._fg_sum?+=?np.sum(labels?==?1)?? ????????????self._bg_sum?+=?np.sum(labels?==?0)?? ????????????self._count?+=?1?? ????????????print?'rpn:?num_positive?avg',?self._fg_sum?/?self._count?? ????????????print?'rpn:?num_negative?avg',?self._bg_sum?/?self._count?? ?? ????????#?labels?? ????????labels?=?labels.reshape((1,?height,?width,?A)).transpose(0,?3,?1,?2)?? ????????labels?=?labels.reshape((1,?1,?A?*?height,?width))?? ????????top[0].reshape(*labels.shape)?? ????????top[0].data[...]?=?labels?? ?? ????????#?bbox_targets?? ????????bbox_targets?=?bbox_targets?\?? ????????????.reshape((1,?height,?width,?A?*?4)).transpose(0,?3,?1,?2)?? ????????top[1].reshape(*bbox_targets.shape)?? ????????top[1].data[...]?=?bbox_targets?? ?? ????????#?bbox_inside_weights?? ????????bbox_inside_weights?=?bbox_inside_weights?\?? ????????????.reshape((1,?height,?width,?A?*?4)).transpose(0,?3,?1,?2)?? ????????assert?bbox_inside_weights.shape[2]?==?height?? ????????assert?bbox_inside_weights.shape[3]?==?width?? ????????top[2].reshape(*bbox_inside_weights.shape)?? ????????top[2].data[...]?=?bbox_inside_weights?? ?? ????????#?bbox_outside_weights?? ????????bbox_outside_weights?=?bbox_outside_weights?\?? ????????????.reshape((1,?height,?width,?A?*?4)).transpose(0,?3,?1,?2)?? ????????assert?bbox_outside_weights.shape[2]?==?height?? ????????assert?bbox_outside_weights.shape[3]?==?width?? ????????top[3].reshape(*bbox_outside_weights.shape)?? ????????top[3].data[...]?=?bbox_outside_weights?? ?? ????def?backward(self,?top,?propagate_down,?bottom):?? ????????"""This?layer?does?not?propagate?gradients."""?? ????????pass?? ?? ????def?reshape(self,?bottom,?top):?? ????????"""Reshaping?happens?during?the?call?to?forward."""?? ????????pass?? ?? ?? def?_unmap(data,?count,?inds,?fill=0):?? ????"""?Unmap?a?subset?of?item?(data)?back?to?the?original?set?of?items?(of? ????size?count)?"""?? ????if?len(data.shape)?==?1:?? ????????ret?=?np.empty((count,?),?dtype=np.float32)?? ????????ret.fill(fill)?? ????????ret[inds]?=?data?? ????else:?? ????????ret?=?np.empty((count,?)?+?data.shape[1:],?dtype=np.float32)?? ????????ret.fill(fill)?? ????????ret[inds,?:]?=?data?? ????return?ret?? ?? ?? def?_compute_targets(ex_rois,?gt_rois):?? ????"""Compute?bounding-box?regression?targets?for?an?image."""?? ?? ????assert?ex_rois.shape[0]?==?gt_rois.shape[0]?? ????assert?ex_rois.shape[1]?==?4?? ????assert?gt_rois.shape[1]?==?5?? ?? ????return?bbox_transform(ex_rois,?gt_rois[:,?:4]).astype(np.float32,?copy=False)??
總結筆記:
rpn-data是AnchorTargetLayer
bottom 長度為4;bottom[0],map;bottom[1],boxes,labels;bottom[2],im_fo;bottom[3],圖片數據
self._feat_stride:網絡中參數16
self._anchors:九個anchor的w h x_cstr y_cstr,對原始的wh做橫向縱向變化,并放大縮小得到九個
self._num_anchors:anchor的個數
inds_inside:沒有過界的anchors索引
anchors:沒有過界的anchors
argmax_overlaps:overlaps每行最大值索引
total_anchors: K*A,所有anchors個數,包括越界的
K: width*height
A: 9
gt_boxes:長度不定
bbox_overlaps: 返回:
overlaps: (len(inds_inside)* len(gt_boxes))
論文筆記:我們分配正標簽給兩類anchor:(i)與某個ground truth(GT)包圍盒有最高的IoU(Intersection-over-Union,交集并集之比)重疊的anchor(也許不到0.7),(ii)與任意GT包圍盒有大于0.7的IoU交疊的anchor。
labels:0,bg; 1,fg; -1, on care,(len(inds_inside));over_laps列最大值對應行坐標=1; over_laps行最大值 > 0.7,行=1; over_laps行最大值 < 0.3,行=0
正樣本數量由他們控制:cfg.TRAIN.RPN_FG_FRACTION * cfg.TRAIN.RPN_BATCHSIZE(128),小于等于
負樣本數量。。。。。:cfg.TRAIN.RPN_BATCHSIZE - np.sum(labels == 1)
cfg.TRAIN.RPN_BATCHSIZE: ?256,最終輸出proposal數量控制
多的proposal被隨機搞成-1了。。。。。。隨機
bbox_inside_weights: label等于1的行,它的值等于cfg.TRAIN.RPN_BBOX_INSIDE_WEIGHTS(1.0);其他等于0;(len(inds_inside), 4);相當于損失函數中的pi*
cfg.TRAIN.RPN_POSITIVE_WEIGHT: ?-1.0
bbox_outside_weights:fg,bg=np.ones((1, 4)) * 1.0 / sum(fg+bg),其他為0;(len(inds_inside), 4)
_unmap: 建立一個total_anchors*第一個參數列的數組;全用fill填充;再把inds_inside對應的行用第一個參數對應的行填充?
http://blog.csdn.net/u010668907/article/details/51942481
faster用python版本的https://github.com/rbgirshick/py-faster-rcnn
AnchorTargetLayer源碼在https://github.com/rbgirshick/py-faster-rcnn/blob/master/lib/rpn/anchor_target_layer.py
源碼粘貼:
[python]?view plaincopy print?
rpn-data是AnchorTargetLayer
bottom 長度為4;bottom[0],map;bottom[1],boxes,labels;bottom[2],im_fo;bottom[3],圖片數據
self._feat_stride:網絡中參數16
self._anchors:九個anchor的w h x_cstr y_cstr,對原始的wh做橫向縱向變化,并放大縮小得到九個
self._num_anchors:anchor的個數
inds_inside:沒有過界的anchors索引
anchors:沒有過界的anchors
argmax_overlaps:overlaps每行最大值索引
total_anchors: K*A,所有anchors個數,包括越界的
K: width*height
A: 9
gt_boxes:長度不定
bbox_overlaps: 返回:
overlaps: (len(inds_inside)* len(gt_boxes))
論文筆記:我們分配正標簽給兩類anchor:(i)與某個ground truth(GT)包圍盒有最高的IoU(Intersection-over-Union,交集并集之比)重疊的anchor(也許不到0.7),(ii)與任意GT包圍盒有大于0.7的IoU交疊的anchor。
labels:0,bg; 1,fg; -1, on care,(len(inds_inside));over_laps列最大值對應行坐標=1; over_laps行最大值 > 0.7,行=1; over_laps行最大值 < 0.3,行=0
正樣本數量由他們控制:cfg.TRAIN.RPN_FG_FRACTION * cfg.TRAIN.RPN_BATCHSIZE(128),小于等于
負樣本數量。。。。。:cfg.TRAIN.RPN_BATCHSIZE - np.sum(labels == 1)
cfg.TRAIN.RPN_BATCHSIZE: ?256,最終輸出proposal數量控制
多的proposal被隨機搞成-1了。。。。。。隨機
bbox_inside_weights: label等于1的行,它的值等于cfg.TRAIN.RPN_BBOX_INSIDE_WEIGHTS(1.0);其他等于0;(len(inds_inside), 4);相當于損失函數中的pi*
cfg.TRAIN.RPN_POSITIVE_WEIGHT: ?-1.0
bbox_outside_weights:fg,bg=np.ones((1, 4)) * 1.0 / sum(fg+bg),其他為0;(len(inds_inside), 4)
_unmap: 建立一個total_anchors*第一個參數列的數組;全用fill填充;再把inds_inside對應的行用第一個參數對應的行填充?
總結
以上是生活随笔為你收集整理的faster rcnn源码理解(二)之AnchorTargetLayer(网络中的rpn_data)的全部內容,希望文章能夠幫你解決所遇到的問題。
- 上一篇: faster rcnn的源码理解(一)S
- 下一篇: faster rcnn源码解读(三)tr