AVOD-代码理解系列(二)
生活随笔
收集整理的這篇文章主要介紹了
AVOD-代码理解系列(二)
小編覺得挺不錯(cuò)的,現(xiàn)在分享給大家,幫大家做個(gè)參考.
AVOD-代碼理解
AVOD代碼理解第二篇,開始介紹網(wǎng)絡(luò)結(jié)構(gòu)的兩大部分rpn和avod部分,在core文件下的avod_model.py和rpn_model.py部分分別是結(jié)構(gòu)的主體,其間會(huì)交叉一些相應(yīng)的處理部分!這篇先介紹網(wǎng)絡(luò)的輸入.
1 在avod_model.py的build開頭,需要來自rpn_model的輸出作為avod部分的輸入.
def build(self):rpn_model = self._rpn_model# Share the same prediction dict as RPN#rpn_model/build'''rpn_model,第一階段'''prediction_dict = rpn_model.build()#nms后的anchorstop_anchors = prediction_dict[RpnModel.PRED_TOP_ANCHORS]ground_plane = rpn_model.placeholders[RpnModel.PL_GROUND_PLANE]#1:carclass_labels = rpn_model.placeholders[RpnModel.PL_LABEL_CLASSES]2 rpn_model.py部分
def build(self):# Setup input placeholders# 一堆的輸入信息.就是給輸入占個(gè)坑self._set_up_input_pls()# Setup feature extractors# 輸入圖像的featuremapself._set_up_feature_extractors()# proposal_input# 1*1卷積后的bev_fasturemap和img_featuremapbev_proposal_input = self.bev_bottleneckimg_proposal_input = self.img_bottleneck# 融合參數(shù)fusion_mean_div_factor = 2.0self._set_up_featuremap_extrators是對(duì)輸入的img圖像和bev圖像卷積,進(jìn)行特征提取.
def _set_up_feature_extractors(self):"""Sets up feature extractors and stores feature maps andbottlenecks as member variables."""#得到輸入鳥瞰圖的featuremap.256self.bev_feature_maps, self.bev_end_points = \self._bev_feature_extractor.build(self._bev_preprocessed,self._bev_pixel_size,self._is_training)#得到輸入圖像的featuremap.和bev一樣的操作self.img_feature_maps, self.img_end_points = \self._img_feature_extractor.build(self._img_preprocessed,self._img_pixel_size,self._is_training)#bev 1*1的卷積操作with tf.variable_scope('bev_bottleneck'):#對(duì)featuremap進(jìn)行1*1的卷積self.bev_bottleneck = slim.conv2d(self.bev_feature_maps,1, [1, 1],scope='bottleneck',normalizer_fn=slim.batch_norm,normalizer_params={'is_training': self._is_training})#img_bottleneck.和bev一樣的操作with tf.variable_scope('img_bottleneck'):self.img_bottleneck = slim.conv2d(self.img_feature_maps,1, [1, 1],scope='bottleneck',normalizer_fn=slim.batch_norm,normalizer_params={'is_training': self._is_training})self._bev_feature_extractor是圖片的特征提取層,對(duì)輸入圖像進(jìn)行.self._img_feature_extractor的操作和其是一樣的
def build(self,inputs,input_pixel_size,is_training,scope='bev_vgg'):""" Modified VGG for BEV feature extractionNote: All the fully_connected layers have been transformed to conv2dlayers and are implemented in the main model.Args:inputs: a tensor of size [batch_size, height, width, channels].input_pixel_size: size of the input (H x W)is_training: True for training, False fo validation/testing.scope: Optional scope for the variables.Returns:包含日志預(yù)測(cè)和end_points dict的最后一個(gè)操作The last op containing the log predictions and end_points dict."""#avod_car_example.config/rpn_configvgg_config = self.config#input為輸入的bev_inoutwith slim.arg_scope(self.vgg_arg_scope(weight_decay=vgg_config.l2_weight_decay)):with tf.variable_scope(scope, 'bev_vgg', [inputs]) as sc:#end_points_collection?end_points_collection = sc.name + '_end_points'# Collect outputs for conv2d, fully_connected and max_pool2d.with slim.arg_scope([slim.conv2d, slim.max_pool2d],outputs_collections=end_points_collection):#[2,32].重復(fù)兩個(gè)卷積.輸出channels=32net = slim.repeat(inputs,vgg_config.vgg_conv1[0],slim.conv2d,vgg_config.vgg_conv1[1],[3, 3],normalizer_fn=slim.batch_norm,normalizer_params={'is_training': is_training},scope='conv1')#max_pool2d的參數(shù)應(yīng)該是不需要訓(xùn)練的net = slim.max_pool2d(net, [2, 2], scope='pool1')#[2,64].重復(fù)2個(gè)卷積,輸出channels=64net = slim.repeat(net,vgg_config.vgg_conv2[0],slim.conv2d,vgg_config.vgg_conv2[1],[3, 3],normalizer_fn=slim.batch_norm,normalizer_params={'is_training': is_training},scope='conv2')net = slim.max_pool2d(net, [2, 2], scope='pool2')#[3,128]net = slim.repeat(net,vgg_config.vgg_conv3[0],slim.conv2d,vgg_config.vgg_conv3[1],[3, 3],normalizer_fn=slim.batch_norm,normalizer_params={'is_training': is_training},scope='conv3')net = slim.max_pool2d(net, [2, 2], scope='pool3')#[3,256]net = slim.repeat(net,vgg_config.vgg_conv4[0],slim.conv2d,vgg_config.vgg_conv4[1],[3, 3],normalizer_fn=slim.batch_norm,normalizer_params={'is_training': is_training},scope='conv4')with tf.variable_scope('upsampling'):# This extractor downsamples the input by a factor# of 8 (3 maxpool layers)downsampling_factor = 8downsampled_shape = input_pixel_size / downsampling_factor#*4.也就是最初的輸入圖像尺寸的1/2upsampled_shape = \downsampled_shape * vgg_config.upsampling_multiplier#雙線性插值 ,net里的目標(biāo)尺寸變?yōu)樵紙D像的1/2#和原文似乎有一點(diǎn)不對(duì).這里的尺寸并不是原圖的尺寸feature_maps_out = tf.image.resize_bilinear(net, upsampled_shape)# Convert end_points_collection into a end_point dict.#集合轉(zhuǎn)換為字典end_points = slim.utils.convert_collection_to_dict(end_points_collection)return feature_maps_out, end_pointsself._set_up_input_pls是輸入.這個(gè)部分有些細(xì)節(jié)部分還是不是很清楚,不過不是會(huì)很影響理解
def _set_up_input_pls(self):"""Sets up input placeholders by adding them to self._placeholders.Keys are defined as self.PL_*."""# Combine config data#輸入的尺寸與深度.深度是6.在config文件里面有些是沒有_bev_pixel_size的bev_dims = np.append(self._bev_pixel_size, self._bev_depth)#鳥瞰圖輸入with tf.variable_scope('bev_input'):# Placeholder for BEV image input, to be filled in with feed_dict#PL_BEV_INPUT一個(gè)名字.6個(gè)值bev_input_placeholder = self._add_placeholder(tf.float32, bev_dims,self.PL_BEV_INPUT)#在列上增加維度self._bev_input_batches = tf.expand_dims(bev_input_placeholder, axis=0)#resize輸入的尺寸.config文件里面沒有限制bev的尺寸#預(yù)處理實(shí)際就是預(yù)處理輸入圖像的尺寸,有些config里面不對(duì)尺寸進(jìn)行限制self. _bev_preprocessed = \self._bev_feature_extractor.preprocess_input(self._bev_input_batches, self._bev_pixel_size)# Summary Images#沿著深度,切割成6分,bev_summary_images = tf.split(bev_input_placeholder, self._bev_depth, axis=2)tf.summary.image("bev_maps", bev_summary_images,max_outputs=self._bev_depth)#圖像輸入with tf.variable_scope('img_input'):# Take variable size input images[none,none,3]img_input_placeholder = self._add_placeholder(tf.float32,[None, None, self._img_depth],self.PL_IMG_INPUT)self._img_input_batches = tf.expand_dims(img_input_placeholder, axis=0)self._img_preprocessed = \self._img_feature_extractor.preprocess_input(self._img_input_batches, self._img_pixel_size)# Summary Imagetf.summary.image("rgb_image", self._img_preprocessed,max_outputs=2)#label.最后一個(gè)是名字.dtype,shape,namewith tf.variable_scope('pl_labels'):#6維的anchors?self._add_placeholder(tf.float32, [None, 6],self.PL_LABEL_ANCHORS)#3D的boxes,x,y,z,dw,dh,dl,ry?self._add_placeholder(tf.float32, [None, 7],self.PL_LABEL_BOXES_3D)#標(biāo)簽的類self._add_placeholder(tf.float32, [None],self.PL_LABEL_CLASSES)# Placeholders for anchorswith tf.variable_scope('pl_anchors'):self._add_placeholder(tf.float32, [None, 6],self.PL_ANCHORS)#iousself._add_placeholder(tf.float32, [None],self.PL_ANCHOR_IOUS)#這個(gè)應(yīng)該就是六個(gè)回歸?t x , ?t y , ?t z , ?d x , ?d y , ?d zself._add_placeholder(tf.float32, [None, 6],self.PL_ANCHOR_OFFSETS)#calssself._add_placeholder(tf.float32, [None],self.PL_ANCHOR_CLASSES)#鳥瞰圖投影.anchor的投影with tf.variable_scope('bev_anchor_projections'):#左上角與右下角坐標(biāo)self._add_placeholder(tf.float32, [None, 4],self.PL_BEV_ANCHORS)#norm一下self._bev_anchors_norm_pl = self._add_placeholder(tf.float32, [None, 4], self.PL_BEV_ANCHORS_NORM)#rbg投影with tf.variable_scope('img_anchor_projections'):#RBG_anchor投影self._add_placeholder(tf.float32, [None, 4],self.PL_IMG_ANCHORS)self._img_anchors_norm_pl = self._add_placeholder(tf.float32, [None, 4], self.PL_IMG_ANCHORS_NORM)#這個(gè)文件不知道是啥?一個(gè)txt文件with tf.variable_scope('saample_info'):# the calib matrix shape is (3 x 4)'''pl_calib_p2'''self._add_placeholder(tf.float32, [3, 4], self.PL_CALIB_P2)'''pl_img_idx'''self._add_placeholder(tf.int32,shape=[1],name=self.PL_IMG_IDX)'''pl_ground_plane'''self._add_placeholder(tf.float32, [4], self.PL_GROUND_PLANE)總結(jié)
以上是生活随笔為你收集整理的AVOD-代码理解系列(二)的全部?jī)?nèi)容,希望文章能夠幫你解決所遇到的問題。
- 上一篇: 视频教程-汇编语言程序设计VI-其他
- 下一篇: InfoPath 教程