當前位置：首頁 > 编程资源 > 编程问答 >内容正文

编程问答

AVOD-代码理解系列（三）

發布時間：2024/8/1 编程问答 37 豆豆

生活随笔收集整理的這篇文章主要介紹了 AVOD-代码理解系列（三）小編覺得挺不錯的,現在分享給大家,幫大家做個參考.

AVOD-代碼理解

　從輸入到數據融合之前

def build(self):# Setup input placeholders# 一堆的輸入信息.就是給輸入占個坑self._set_up_input_pls()# Setup feature extractors# 輸入圖像的featuremapself._set_up_feature_extractors()# proposal_input# 1*1卷積后的bev_fasturemap和img_featuremapbev_proposal_input = self.bev_bottleneckimg_proposal_input = self.img_bottleneck# 融合參數fusion_mean_div_factor = 2.0# If both img and bev probabilites are set to 1.0, don't do# path drop.如果image和bev概率都設置為1.0，則不要執行路徑丟棄。#train=0.9,test/val=1.0#ture!if not (self._path_drop_probabilities[0] ==self._path_drop_probabilities[1] == 1.0):with tf.variable_scope('rpn_path_drop'):#從均勻分布中輸出隨機值。.隨機輸出3個0-1之間的數random_values = tf.random_uniform(shape=[3],minval=0.0,maxval=1.0)#0.9,0.9.#不是零就是1img_mask, bev_mask = self.create_path_drop_masks(self._path_drop_probabilities[0],self._path_drop_probabilities[1],random_values)#選擇是否輸入.如果mask為1 ,則輸入,否則不輸入img_proposal_input = tf.multiply(img_proposal_input,img_mask)bev_proposal_input = tf.multiply(bev_proposal_input,bev_mask)self.img_path_drop_mask = img_maskself.bev_path_drop_mask = bev_mask# Overwrite the division factor#在訓練時的融合參數fusion_mean_div_factor = img_mask + bev_mask#bev和iname的featuremap的裁剪with tf.variable_scope('proposal_roi_pooling'):with tf.variable_scope('box_indices'):'''有點復雜'''def get_box_indices(boxes):#list列表.[1,2,3...]#看不懂proposals_shape = boxes.get_shape().as_list()if any(dim is None for dim in proposals_shape):#falseproposals_shape = tf.shape(boxes)ones_mat = tf.ones(proposals_shape[:2], dtype=tf.int32)multiplier = tf.expand_dims(tf.range(start=0, limit=proposals_shape[0]), 1)return tf.reshape(ones_mat * multiplier, [-1])#在列上增加一維?[none,5]?bev_boxes_norm_batches = tf.expand_dims(self._bev_anchors_norm_pl, axis=0)# These should be all 0's since there is only 1 image#這些應該全是0，因為只有1個圖像tf_box_indices = get_box_indices(bev_boxes_norm_batches)# Do ROI Pooling on BEV#tf.image.crop_to_bounding_box(image, offset_height, offset_width, target_height, target_width)#[num_boxes, crop_height, crop_width, depth].'''在網絡結構中，融合兩種數據之前會有一個resize'''#主要目的是讓兩種數據的輸入能夠統一一下，便于后續做數據融合．resize為６＊６的輸出bev_proposal_rois = tf.image.crop_and_resize(bev_proposal_input,self._bev_anchors_norm_pl,tf_box_indices,self._proposal_roi_crop_size)# Do ROI Pooling on imageimg_proposal_rois = tf.image.crop_and_resize(img_proposal_input,self._img_anchors_norm_pl,tf_box_indices,self._proposal_roi_crop_size)

self.create_path_drop_masks：這個函數塊的主要功能就是確定bev_mask和img_mask，其實我也不明白這樣做的意義是啥．

def create_path_drop_masks(self,p_img,p_bev,random_values):"""Determines global path drop decision based on given probabilities.Args:p_img: A tensor of float32, probability of keeping image branch保持圖像分支的概率p_bev: A tensor of float32, probability of keeping bev branchrandom_values: A tensor of float32 of shape [3], the resultsof coin flips, values should range from 0.0 - 1.0.Returns:final_img_mask: A constant tensor mask containing either one or zerodepending on the final coin flip probability.final_bev_mask: A constant tensor mask containing either one or zerodepending on the final coin flip probability."""#keep=1,kill=0def keep_branch(): return tf.constant(1.0)def kill_branch(): return tf.constant(0.0)# The logic works as follows:# We have flipped 3 coins, first determines the chance of keeping# the image branch, second determines keeping bev branch, the third# makes the final decision in the case where both branches were killed# off, otherwise the initial img and bev chances are kept.#首先確定保持圖像分支的機會，第二個確定保持bev分支，# 第三個決定在兩個分支被殺掉的情況下做出最終決定，否則保留初始的img和bev機會。#tf.less()->bool/將x的數據格式轉化成dtype.#random_value[0]<0.9,keepimg_chances = tf.case([(tf.less(random_values[0], p_img),keep_branch)], default=kill_branch)#random_value[1]<0.9,keep,默認為kill!bev_chances = tf.case([(tf.less(random_values[1], p_bev),keep_branch)], default=kill_branch)# Decision to determine whether both branches were killed off#兩者是否有一個為1,則返回1．不應該是and嗎?third_flip = tf.logical_or(tf.cast(img_chances, dtype=tf.bool),tf.cast(bev_chances, dtype=tf.bool))#兩者有一個為1,則third_flip=1.0third_flip = tf.cast(third_flip, dtype=tf.float32)# Make a second choice, for the third case# Here we use a 50/50 chance to keep either image or bev# If its greater than 0.5, keep the image#random_value[2]>0.5,keepimg_second_flip = tf.case([(tf.greater(random_values[2], 0.5),keep_branch)],default=kill_branch)# If its less than or equal to 0.5, keep bev#random_value[2]<=0.5,keep/兩者相沖bev_second_flip = tf.case([(tf.less_equal(random_values[2], 0.5),keep_branch)],default=kill_branch)# Use lambda since this returns another condition and it needs to# be callable#如果third=1,則使用img_chances作為最終結果,即random_value[0]<0.9,keep#否則,以第二種方法作為最終結果／第二種方法里面只會有一個為truefinal_img_mask = tf.case([(tf.equal(third_flip, 1),lambda: img_chances)],default=lambda: img_second_flip)#同上final_bev_mask = tf.case([(tf.equal(third_flip, 1),lambda: bev_chances)],default=lambda: bev_second_flip)return final_img_mask, final_bev_mask

2 數據融合到rpn，也就是利用卷積網絡對其進行分類（二分類：背景和物體）和回歸

#bev和image的融合with tf.variable_scope('proposal_roi_fusion'):rpn_fusion_out = None#meanif self._fusion_method == 'mean':tf_features_sum = tf.add(bev_proposal_rois, img_proposal_rois)#/2平均融合方式rpn_fusion_out = tf.divide(tf_features_sum,fusion_mean_div_factor)elif self._fusion_method == 'concat':rpn_fusion_out = tf.concat([bev_proposal_rois, img_proposal_rois], axis=3)else:raise ValueError('Invalid fusion method', self._fusion_method)# TODO: move this section into an separate AnchorPredictor classwith tf.variable_scope('anchor_predictor', 'ap', [rpn_fusion_out]):#融合后的作為輸入tensor_in = rpn_fusion_out# Parse rpn layers configlayers_config = self._config.layers_config.rpn_config#0.0005l2_weight_decay = layers_config.l2_weight_decayif l2_weight_decay > 0:#正則化weights_regularizer = slim.l2_regularizer(l2_weight_decay)else:weights_regularizer = Nonewith slim.arg_scope([slim.conv2d],weights_regularizer=weights_regularizer):# Use conv2d instead of fully_connected layers.#256,6上一層的輸出實際上就是6*6的所以將全連接化為卷積操作，使用6*6的卷積核cls_fc6 = slim.conv2d(tensor_in,layers_config.cls_fc6,self._proposal_roi_crop_size,padding='VALID',scope='cls_fc6')#0.5cls_fc6_drop = slim.dropout(cls_fc6,layers_config.keep_prob,is_training=self._is_training,scope='cls_fc6_drop')#256cls_fc7 = slim.conv2d(cls_fc6_drop,layers_config.cls_fc7,[1, 1],scope='cls_fc7')cls_fc7_drop = slim.dropout(cls_fc7,layers_config.keep_prob,is_training=self._is_training,scope='cls_fc7_drop')#2,分類cls_fc8 = slim.conv2d(cls_fc7_drop,2,[1, 1],activation_fn=None,scope='cls_fc8')#刪除指定尺寸為1 的objectness = tf.squeeze(cls_fc8, [1, 2],name='cls_fc8/squeezed')# Use conv2d instead of fully_connected layers.#256,6reg_fc6 = slim.conv2d(tensor_in,layers_config.reg_fc6,self._proposal_roi_crop_size,padding='VALID',scope='reg_fc6')#dropout0.5reg_fc6_drop = slim.dropout(reg_fc6,layers_config.keep_prob,is_training=self._is_training,scope='reg_fc6_drop')#256reg_fc7 = slim.conv2d(reg_fc6_drop,layers_config.reg_fc7,[1, 1],scope='reg_fc7')reg_fc7_drop = slim.dropout(reg_fc7,layers_config.keep_prob,is_training=self._is_training,scope='reg_fc7_drop')#?t x , ?t y , ?t z , ?d x , ?d y , ?d z#256,6個回歸值包括中心點差值,以及長寬高的差值reg_fc8 = slim.conv2d(reg_fc7_drop,6,[1, 1],activation_fn=None,scope='reg_fc8')#?offsets = tf.squeeze(reg_fc8, [1, 2],name='reg_fc8/squeezed')

總結

以上是生活随笔為你收集整理的AVOD-代码理解系列（三）的全部內容，希望文章能夠幫你解決所遇到的問題。

如果覺得生活随笔網站內容還不錯，歡迎將生活随笔推薦給好友。

上一篇：【论文解读】MV3D-Net、AVOD-
下一篇： jrtplib linux编译,linu