當前位置：首頁 > 运维知识 > Ubuntu >内容正文

Ubuntu

Ubuntu 16.04下Caffe-SSD的应用（四）——ssd_pascal.py源码解读

發(fā)布時間：2025/3/21 Ubuntu 17 豆豆

生活随笔收集整理的這篇文章主要介紹了 Ubuntu 16.04下Caffe-SSD的应用（四）——ssd_pascal.py源码解读小編覺得挺不錯的,現(xiàn)在分享給大家,幫大家做個參考.

前言

caffe-ssd所有的訓練時的參數(shù)，全部由ssd_pascal.py來定義，之后再去調(diào)用相關的腳本和函數(shù)，所以想要訓練自己的數(shù)據(jù)，首先要明白ssd_pascal.py各個定義參數(shù)的大體意思。

ssd_pascal.py源碼解讀

from __future__ import print_function import caffe from caffe.model_libs import * from google.protobuf import text_formatimport math import os import shutil import stat import subprocess import sys# 給基準網(wǎng)絡后面增加額外的卷積層（為了避免此處的卷積層的名稱和基準網(wǎng)絡卷積層的名稱重復， #這里可以用基準網(wǎng)絡最后一個層的名稱進行開始命名），這一部分的具體實現(xiàn)方法可以對照文件 #~/caffe/python/caffe/model_libs.py查看，SSD的實現(xiàn)基本上就是ssd_pascal.py和model_libs.py #兩個文件在控制，剩下的則是caffe底層代碼中編寫各個功能模塊。 def AddExtraLayers(net, use_batchnorm=True):use_relu = True#生成附加網(wǎng)絡的第一個卷積層，卷積核的數(shù)量為256，卷積核的大小為1*1,pad的尺寸為0，stride為1. # 獲得基準網(wǎng)絡的最后一層，作為conv6-1層的輸入from_layer = net.keys()[-1]# TODO(weiliu89): Construct the name using the last layer to avoid duplication.out_layer = "conv6_1"#conv6_1生成完畢ConvBNLayer(net, from_layer, out_layer, use_batchnorm, use_relu, 256, 1, 0, 1)#生成附加網(wǎng)絡的第一個卷積層，卷積核的數(shù)量為512，卷積核的大小為3*3,pad的尺寸為1，stride為2.from_layer = out_layerout_layer = "conv6_2"#conv6_2生成完畢ConvBNLayer(net, from_layer, out_layer, use_batchnorm, use_relu, 512, 3, 1, 2)#conv7_1到conv9_2的生成for i in xrange(7, 9):from_layer = out_layerout_layer = "conv{}_1".format(i)ConvBNLayer(net, from_layer, out_layer, use_batchnorm, use_relu, 128, 1, 0, 1)from_layer = out_layerout_layer = "conv{}_2".format(i)ConvBNLayer(net, from_layer, out_layer, use_batchnorm, use_relu, 256, 3, 1, 2)#添加全局池層name = net.keys()[-1]net.pool6 = L.Pooling(net[name], pool=P.Pooling.AVE, global_pooling=True)return net### 相應地修改一下參數(shù) ### # 包含caffe代碼的路徑 # 假設當前路徑是在caffe跟目錄下運行代碼 caffe_root = os.getcwd() #獲取caffe的根目錄 #在生成所有訓練文件之后就開始訓練，這里run_soon給予參數(shù)Ture. run_soon = True #如果接著上次的訓練，繼續(xù)進行訓練，這里的參數(shù)為Ture，（就是訓練一半停止了，重新啟動的時候，這里的Ture保證繼續(xù)接著上次的訓練進行訓練） #否則為False，表示將從下面定義的預訓練模型處進行加載。（這個表示就是不管上次訓練一半的模型，直接從預訓練好的基準模型哪里開始訓練） resume_training = True # 如果是Ture的話，表示要移除舊的模型訓練文件，否則是不移除的。 remove_old_models = False#如果想用CPU進行訓練，設置為TRUE use_cpu = False#訓練數(shù)據(jù)的數(shù)據(jù)庫文件，就是create_data.sh生成的trainval_lmdb文件 train_data = "data/VOC0712/trainval_lmdb" #測試數(shù)據(jù)的數(shù)據(jù)庫文件，就是create_data.sh生成的test_lmdb文件 test_data = "data/VOC0712/test_lmdb" # 指定批量采樣器，可以改成500X500 resize_width = 300 resize_height = 300 resize = "{}x{}".format(resize_width, resize_height) batch_sampler = [{'sampler': {},'max_trials': 1,'max_sample': 1,},{'sampler': {'min_scale': 0.3,'max_scale': 1.0,'min_aspect_ratio': 0.5,'max_aspect_ratio': 2.0,},'sample_constraint': {'min_jaccard_overlap': 0.1,},'max_trials': 50,'max_sample': 1,},{'sampler': {'min_scale': 0.3,'max_scale': 1.0,'min_aspect_ratio': 0.5,'max_aspect_ratio': 2.0,},'sample_constraint': {'min_jaccard_overlap': 0.3,},'max_trials': 50,'max_sample': 1,},{'sampler': {'min_scale': 0.3,'max_scale': 1.0,'min_aspect_ratio': 0.5,'max_aspect_ratio': 2.0,},'sample_constraint': {'min_jaccard_overlap': 0.5,},'max_trials': 50,'max_sample': 1,},{'sampler': {'min_scale': 0.3,'max_scale': 1.0,'min_aspect_ratio': 0.5,'max_aspect_ratio': 2.0,},'sample_constraint': {'min_jaccard_overlap': 0.7,},'max_trials': 50,'max_sample': 1,},{'sampler': {'min_scale': 0.3,'max_scale': 1.0,'min_aspect_ratio': 0.5,'max_aspect_ratio': 2.0,},'sample_constraint': {'min_jaccard_overlap': 0.9,},'max_trials': 50,'max_sample': 1,},{'sampler': {'min_scale': 0.3,'max_scale': 1.0,'min_aspect_ratio': 0.5,'max_aspect_ratio': 2.0,},'sample_constraint': {'max_jaccard_overlap': 1.0,},'max_trials': 50,'max_sample': 1,},] train_transform_param = {'mirror': True,'mean_value': [104, 117, 123],#均值 'resize_param': {#存儲數(shù)據(jù)轉換器用于調(diào)整大小策略的參數(shù)的消息。'prob': 1,#使用這個調(diào)整策略的可能性 'resize_mode': P.Resize.WARP,#重定義大小的模式，caffe.proto中定義的是枚舉類型 'height': resize_height,'width': resize_width,'interp_mode': [#插值模式用于調(diào)整大小，定義為枚舉類型 P.Resize.LINEAR,P.Resize.AREA,P.Resize.NEAREST,P.Resize.CUBIC,P.Resize.LANCZOS4,],},'emit_constraint': {'emit_type': caffe_pb2.EmitConstraint.CENTER,}} test_transform_param = {#測試轉換參數(shù)，類似于訓練轉換參數(shù)。'mean_value': [104, 117, 123],'resize_param': {'prob': 1,'resize_mode': P.Resize.WARP,'height': resize_height,'width': resize_width,'interp_mode': [P.Resize.LINEAR],},}#如果為ture，則對所有新添加的層使用批處理規(guī)范。 #目前只測試了非批處理規(guī)范版本。 use_batchnorm = False #使用不同的初始學習率 if use_batchnorm:base_lr = 0.0004 else:# 當batch_size = 1, num_gpus = 1時的學習率.base_lr = 0.00004 #由于上面use_batchnorm = false，所以我們一般調(diào)整初始學習率時只需更改這一部分，目前為0.001。 #可以在這里更改工作路徑與名稱. job_name = "SSD_{}".format(resize) #更改生成的模型名稱. model_name = "VGG_VOC0712_{}".format(job_name)# 存儲模型.prototxt文件的目錄. save_dir = "models/VGGNet/VOC0712/{}".format(job_name) # 存儲模型快照的目錄. snapshot_dir = "models/VGGNet/VOC0712/{}".format(job_name) # 存儲工作腳本和日志文件的目錄. job_dir = "jobs/VGGNet/VOC0712/{}".format(job_name) # 存儲檢測結果的目錄. output_result_dir = "data/VOC0712/results/{}/Main".format(job_name)# 模型定義文件. train_net_file = "{}/train.prototxt".format(save_dir) test_net_file = "{}/test.prototxt".format(save_dir) deploy_net_file = "{}/deploy.prototxt".format(save_dir) solver_file = "{}/solver.prototxt".format(save_dir) # 快照前綴. snapshot_prefix = "{}/{}".format(snapshot_dir, model_name) # 工作腳本路徑. job_file = "{}/{}_train.bat".format(job_dir, model_name)#存儲測試圖像的名稱和大小，是create_list.sh生成的test_name_size.txt文件路徑 name_size_file = "data/VOC0712/test_name_size.txt" #預訓練模型。使用完卷積截斷的VGGNet，使用官方或者別從成熟模型參數(shù) pretrain_model = "models/VGGNet/VGG_ILSVRC_16_layers_fc_reduced.caffemodel" #存儲類型的文件 label_map_file = "data/VOC0712/labelmap_voc.prototxt"#要預測的類的數(shù)量。VOC所標注的分類數(shù)加背景圖. num_classes = 21 #位置共享，如果為true，邊框在不同的類中共享 share_location = True #背景圖的標簽名稱 background_label_id=0 #是否考慮困難的ground truth，默認為true train_on_diff_gt = True #如何規(guī)范跨越批次，空間維度或其他維度聚集的損失層的損失。 #目前只在SoftmaxWithLoss和SigmoidCrossEntropyLoss圖層中實現(xiàn)。 #按照批次中的示例數(shù)量乘以空間維度。在計算歸一化因子時， #不會忽略接收忽略標簽的輸出。定義為枚舉，四種類型分別是：FULL， #除以不帶ignore_label的輸出位置總數(shù)。如果未設置ignore_label，則表現(xiàn)為FULL；VALID； normalization_mode = P.Loss.VALID #bbox的編碼方式。此參數(shù)定義在PriorBoxParameter參數(shù)定義解釋中， #為枚舉類型，三種類型為：CORNER，CENTER_SIZE和CORNER_SIZE。 code_type = P.PriorBox.CENTER_SIZE #負/正比率，即文中所說的1：3 neg_pos_ratio = 3. #位置損失的權重 loc_weight = (neg_pos_ratio + 1.) / 4. multibox_loss_param = { #存儲MultiBoxLossLayer使用的參數(shù)的消息'loc_loss_type': P.MultiBoxLoss.SMOOTH_L1,#位置損失類型，定義為枚舉，有L2和SMOOTH_L1兩種類型.'conf_loss_type': P.MultiBoxLoss.SOFTMAX, #置信損失類型，定義為枚舉，有SOFTMAX和LOGISTIC兩種。 'loc_weight': loc_weight,'num_classes': num_classes,'share_location': share_location,'match_type': P.MultiBoxLoss.PER_PREDICTION,#訓練中的匹配方法。定義為枚舉，有BIPARTITE和PER_PREDICTION兩種。如果match_type為PER_PREDICTION（即每張圖預測），則使用overlap_threshold來確定額外的匹配bbox。 'overlap_threshold': 0.5, #閥值大小。即我們所說的IoU的大小'use_prior_for_matching': True,#是否使用先驗匹配，一般為true。'background_label_id': background_label_id, #背景標簽的類別編號，一般為0'use_difficult_gt': train_on_diff_gt,#是否考慮困難的ground truth，默認為true。'do_neg_mining': True,'neg_pos_ratio': neg_pos_ratio, #負/正比率，即文中所說的1：3'neg_overlap': 0.5,#對于不匹配的預測，上限為負的重疊。即如果重疊小于0.5則定義為負樣本，Faster R-CNN設置為0.3。'code_type': code_type, #bbox的編碼方式。此參數(shù)定義在PriorBoxParameter參數(shù)定義解釋中，為枚舉類型，三種類型為：CORNER，CENTER_SIZE和CORNER_SIZE。} loss_param = {#存儲由損失層共享的參數(shù)的消息 'normalization': normalization_mode,#如何規(guī)范跨越批次，空間維度或其他維度聚集的損失層的損失。目前只在SoftmaxWithLoss和SigmoidCrossEntropyLoss圖層中實現(xiàn)。按照批次中的示例數(shù)量乘以空間維度。在計算歸一化因子時，不會忽略接收忽略標簽的輸出。定義為枚舉，四種類型分別是：FULL，除以不帶ignore_label的輸出位置總數(shù)。如果未設置ignore_label，則表現(xiàn)為FULL；VALID；BATCH_SIZE，除以批量大小；NONE，不要規(guī)范化損失。 }#參數(shù)生成先驗。 #輸入圖像的最小尺寸 min_dim = 300#維度 # conv4_3 ==> 38 x 38 # fc7 ==> 19 x 19 # conv6_2 ==> 10 x 10 # conv7_2 ==> 5 x 5 # conv8_2 ==> 3 x 3 # pool6 ==> 1 x 1 #prior_box來源層，可以更改。很多改進都是基于此處的調(diào)整。 mbox_source_layers = ['conv4_3', 'fc7', 'conv6_2', 'conv7_2', 'conv8_2', 'pool6'] #這里即是論文中所說的Smin=0.2，Smax=0.9的初始值，經(jīng)過下面的運算即可得到min_sizes，max_sizes。 min_ratio = 20 max_ratio = 95 #取一個間距步長，即在下面for循環(huán)給ratio取值時起一個間距作用。可以用一個具體的數(shù)值代替，這里等于17 step = int(math.floor((max_ratio - min_ratio) / (len(mbox_source_layers) - 2))) #經(jīng)過以下運算得到min_sizes和max_sizes。 min_sizes = [] max_sizes = [] #從min_ratio至max_ratio+1每隔step=17取一個值賦值給ratio。注意xrange函數(shù)的作用。 for ratio in xrange(min_ratio, max_ratio + 1, step): #min_sizes.append（）函數(shù)即把括號內(nèi)部每次得到的值依次給了min_sizes。min_sizes.append(min_dim * ratio / 100.)max_sizes.append(min_dim * (ratio + step) / 100.) min_sizes = [min_dim * 10 / 100.] + min_sizes max_sizes = [[]] + max_sizes #這里指的是橫縱比，六種尺度對應六個產(chǎn)生prior_box的卷積層。 #具體可查看生成的train.prototxt文件一一對應每層的aspect_ratio參數(shù)， #此參數(shù)在caffe.proto中有定義，關于aspect_ratios如何把其內(nèi)容傳遞 #給了aspect_ratio，在model_libs.py文件中有詳細定義。 aspect_ratios = [[2], [2, 3], [2, 3], [2, 3], [2, 3], [2, 3]] #對卷積層conv4_3做歸一化。model_libs.py里產(chǎn)生了normallize層，具體的層定義， #參看底層代碼~/caffe/src/layers/Ｎormalize_layer.cpp， #為什么這里設置conv4_3為20我也沒看懂，原諒Ｃ++太渣，這里每個數(shù)對應每個先驗層， #只要哪個層對應的數(shù)不為-1則產(chǎn)生normal。 normalizations = [20, -1, -1, -1, -1, -1] #兩種選擇，根據(jù)參數(shù)code_type的選擇決定，由于上面已經(jīng)將code_type選定。有人理解 #為變量variance用來對bbox的回歸目標進行放大，從而加快對應濾波器參數(shù)的收斂。 #除以variance是對預測box和真實box的誤差進行放大，從而增加loss，增大梯度，加快收斂。 #另外，top_data += top[0]->offset(0, 1);已經(jīng)使指針指向新的地址，所以variance不會覆 #蓋前面的結果。prior_variance在model_libs.py中傳遞給了variance變量， #然后利用prior_box_layer.cpp將其運算定義至priorbox_layer層中， #具體可查看train.prototxt中的每一個先驗卷積層層中產(chǎn)生先驗框的層中，即**_mbox_priorbox。 prior_variance = [0.1, 0.1, 0.2, 0.2] else:prior_variance = [0.1] #如果為true，則會翻轉每個寬高比。例如， #如果有縱橫比“r”，我們也會產(chǎn)生縱橫比“1.0 / r”。故產(chǎn)生{1，2，3，1/2，1/3}。 flip = True #做clip操作是為了讓prior的候選坐標位置保持在[0,1]范圍內(nèi)。 #在caffe.proto文件中有關于參數(shù)clip的解釋，為”如果為true，則將先驗框裁剪為[0，1] clip = True# 求解參數(shù)。 # 定義要使用的GPU。 gpus = "0" #"0,1,2,3" #多塊GPU的編號，如果只有一塊，這里只需保留0，否則會出錯。 gpulist = gpus.split(",")#獲取GPU的列表。 num_gpus = len(gpulist) #獲取GPU編號。#使用CPU進行訓練 use_cpu = False# 將小批次劃分為不同的GPU if use_cpu:num_gpus = 0#設置訓練樣本輸入的數(shù)量，不要超出內(nèi)存就好。 batch_size = 2 # 32 #這里與batch_size相搭配產(chǎn)生下面的iter_size。在看了下一行你就知道它的作用了。 accum_batch_size = 32 #如果iter_size=1,則前向傳播一次后進行一次反向傳遞，如果=2， #則兩次前傳后進行一次反傳，這樣做是減少每次傳播所占用的內(nèi)存空間， #有的硬件不行的話就無法訓練，但是增加iter會使訓練時間增加，但是總的迭代次數(shù)不變。 iter_size = accum_batch_size / batch_size solver_mode = P.Solver.CPU device_id = 0 #批次傳遞 batch_size_per_device = batch_size if num_gpus > 0:#這里指如果你有多塊GPU則可以將這些訓練任務均分給多塊GPU訓練，從而加快訓練速度。batch_size_per_device = int(math.ceil(float(batch_size) / num_gpus))#多塊GPU的iter_size大小計算，上面的是一塊的時候。iter_size = int(math.ceil(float(accum_batch_size) / (batch_size_per_device * num_gpus)))solver_mode = P.Solver.GPUdevice_id = int(gpulist[0])#如果損失層的參數(shù)NormalizationMode選擇NONE，即沒有歸一化模式，則基礎學習率為本文件之 #上的base_lr=0.0004除以batch_size_per_device=32得到新的base_lr=1.25*10^(-5)。 if normalization_mode == P.Loss.NONE:base_lr /= batch_size_per_device #同理，根據(jù)不同的歸一化模式選擇不同的base_lr。在本文件上面我們看到 #normalization_mode = P.Loss.VALID，而loc_weight = (neg_pos_ratio + 1.) / 4==1， #所以新的base_lr=25*0.0004=0.001，這就是為什么我們最后生成的solver.prototxt文件 #中的base_lr=0.001的原因，所以如果訓練發(fā)散想通過減小base_lr來實驗， #則要更改最上面的base_lr=0.0004才可以。 elif normalization_mode == P.Loss.VALID:base_lr *= 25. / loc_weight elif normalization_mode == P.Loss.FULL:# 每幅圖像大概有2000個先驗bbox。 # TODO(weiliu89): 估計確切的先驗數(shù)量。base_lr *= 2000. #base_lr=2000*0.0004=0.8。# Which layers to freeze (no backward) during training. freeze_layers = ['conv1_1', 'conv1_2', 'conv2_1', 'conv2_2']# 評估整個測試集。 num_test_image = 4952#整個測試集圖像的數(shù)量。 test_batch_size = 1 #測試時的batch_size。理想情況下，test_batch_size應該被num_test_image整除，否則mAP會略微偏離真實值。這里計算每測試迭代多少次可以覆蓋整個測試集，和分類網(wǎng)絡中的是一致的。這里4952/8=619，如果你的測試圖片除以你的test_batch_size不等于整數(shù)，那么這里會取一個近似整數(shù)。 test_iter = num_test_image / test_batch_size#solver.prototxt文件中的各參數(shù)的取值，這里相信做過caffe訓練的人應該大致有了解。 solver_param = { # 訓練參數(shù)'base_lr': base_lr,#網(wǎng)絡的基礎學習速率,一般設一個很小的值,然后根據(jù)迭代到不同次數(shù),對學習速率做相應的變化.lr過大不會收斂,過小收斂過慢'weight_decay': 0.0005,#權衰量,用于防止過擬合'lr_policy': "step", #學習速率的衰減策略,詳細見后面'stepsize': 40000,#每40000次迭代減少學習率(這一項和lr_policy有關)'gamma': 0.1,#學習率變化的比率(這一項和lr_policy有關)'momentum': 0.9, #網(wǎng)絡的沖量;學習的參數(shù),不用變;上一次梯度更新的權重'iter_size': iter_size, #實際使用的batch size。相當于讀取batchsize*itersize個圖像才做一下gradient decent。這個參數(shù)可以規(guī)避由于gpu不足而導致的batchsize的限制因為你可以用多個iteration做到很大的batch 即使單次batch有限'max_iter': 60000,#最大迭代次數(shù),告訴網(wǎng)絡何時停止訓練.太小達不到收斂,太大會導致震蕩'snapshot': 40000,#每40000次迭代打印一次快照(就是把當前數(shù)據(jù)保存下來,方便下次重用,如果電源不穩(wěn)定容易意外關機建'display': 10,#每經(jīng)過10次迭代,在屏幕上打印一次運行l(wèi)og(告訴你當前的loss之類的...)'average_loss': 10,#取多次foward的loss作平均,進行顯示輸出'type': "SGD",#選擇一種優(yōu)化算法'solver_mode': solver_mode,#選擇CPU or GPU'device_id': device_id#選擇幾塊GPU'debug_info': False,'snapshot_after_train': True,#表示在訓練完后把最后一次的訓練結果保存下來# 測試參數(shù) 'test_iter': [test_iter],'test_interval': 10000,#測試10000次輸出一次測試結果 'eval_type': "detection",'ap_version': "11point",'test_initialization': False,#表示可以用上次保存的snapshot來繼續(xù)訓練}# 生成檢測輸出的參數(shù)。 det_out_param = {'num_classes': num_classes,#類別數(shù)目'share_location': share_location, #位置共享。'background_label_id': background_label_id,#背景類別編號，這里為0。'nms_param': {'nms_threshold': 0.45, 'top_k': 400},#非最大抑制參數(shù)，閥值為0.45，top_k表示最大數(shù)量的結果要保留，文中介紹，非最大抑制的作用就是消除多余的框，就是使評分低的框剔除。參數(shù)解釋在caffe.proto中有介紹。 #用于保存檢測結果的參數(shù)，這一部分參數(shù)在caffe.proto中的SaveOutputParameter有定義。'save_output_param': {#輸出目錄。如果不是空的，我們將保存結果。前面我們有定義結果保存的路徑.'output_directory': output_result_dir, #輸出名稱前綴。 'output_name_prefix': "comp4_det_test_",#輸出格式。VOC - PASCAL VOC輸出格式。COCO - MS COCO輸出格式.'output_format': "VOC",#如果要輸出結果，還必須提供以下兩個文件。否則，我們將忽略保存結果。#標簽映射文件。這在前面中有給label_map_file附文件，也就是我們在訓練#的時候所做的labelmap.prototxt文件的位置'label_map_file': label_map_file,#即我們在訓練時定義的test_name_size.txt文件的路徑。該文件表示測試圖片的大小。 'name_size_file': name_size_file,#測試圖片的數(shù)量。'num_test_image': num_test_image,},#nms步之后每個圖像要保留的bbox總數(shù)。-1表示在nms步之后保留所有的bbox.'keep_top_k': 200,#只考慮可信度大于閾值的檢測。如果沒有提供，請考慮所有的框。'confidence_threshold': 0.01,#bbox的編碼方式。'code_type': code_type,}# parameters for evaluating detection results. det_eval_param = {'num_classes': num_classes,'background_label_id': background_label_id,'overlap_threshold': 0.5,'evaluate_difficult_gt': False,'name_size_file': name_size_file,}###不需要改變以下參數(shù) ### #檢查文件。這一部分是檢查你的所有訓練驗證過程必須有的文件與數(shù)據(jù)提供。 check_if_exist(train_data) check_if_exist(test_data) check_if_exist(label_map_file) check_if_exist(pretrain_model) make_if_not_exist(save_dir) make_if_not_exist(job_dir) make_if_not_exist(snapshot_dir)#創(chuàng)建訓練網(wǎng)絡。這一部分主要是在model_libs.py中完成的。 net = caffe.NetSpec() #調(diào)用model_libs.py中的CreateAnnotatedDataLayer()函數(shù)，創(chuàng)建標注數(shù)據(jù)傳遞層，將括號中的參數(shù)傳遞進去。 #model_libs.py文件中提供了四種基礎網(wǎng)絡，即VGG、ZF、ResNet101和ResNet152。 net.data, net.label = CreateAnnotatedDataLayer(train_data, batch_size=batch_size_per_device,train=True, output_label=True, label_map_file=label_map_file,transform_param=train_transform_param, batch_sampler=batch_sampler) #調(diào)用model_libs.py中的VGGNetBody()函數(shù)創(chuàng)建截斷的VGG基礎網(wǎng)絡。參數(shù)傳遞進去。model_libs.py文件中提供了四種基礎網(wǎng)絡， #即VGG、ZF、ResNet101和ResNet152。可以分別查看不同基礎網(wǎng)絡的調(diào)用方式。 VGGNetBody(net, from_layer='data', fully_conv=True, reduced=True, dilated=True,#這些參數(shù)分別表示：from_layer表示本基礎網(wǎng)絡的數(shù)據(jù)源來自data層的輸出，fully_conv=Ture表示使用全卷積，#reduced=Ｔure在該文件中可以發(fā)現(xiàn)是負責選用全卷積層的某幾個參數(shù)的取值和最后選擇不同參數(shù)的全鏈接層，#dilated=Ｔrue表示是否需要fc6和fc7間的pool5層以及選擇其參數(shù)還有配合reduced共同選擇全卷積層的參數(shù)選擇，#dropout表示是否需要dropout層flase表示不需要。 dropout=False, freeze_layers=freeze_layers)#以下為添加特征提取的層，即調(diào)用我們本文件最上面定義的需要額外添加的幾個層，即conv6_1,conv6_2等等。 AddExtraLayers(net, use_batchnorm)#調(diào)用CreateMultiBoxHead()函數(shù)創(chuàng)建先驗框的提取及匹配等層數(shù)，下面這些參數(shù)其實我們在上面全部都有解釋， #具體仍然可以參照caffe.proto和model_libs.py以及該層對應的cpp實現(xiàn)文件去閱讀理解。 mbox_layers = CreateMultiBoxHead(net, data_layer='data', from_layers=mbox_source_layers,use_batchnorm=use_batchnorm, min_sizes=min_sizes, max_sizes=max_sizes,aspect_ratios=aspect_ratios, normalizations=normalizations,num_classes=num_classes, share_location=share_location, flip=flip, clip=clip,prior_variance=prior_variance, kernel_size=3, pad=1)#創(chuàng)建MultiBoxLossLayer。即創(chuàng)建損失層。這里包括置信損失和位置損失的疊加。 #具體計算的實現(xiàn)在multibox_loss_layer.cpp中實現(xiàn)，其中的哥哥參數(shù)想multi_loss_param #和loss_param等參數(shù)在前面均有定義。 name = "mbox_loss" mbox_layers.append(net.label) #這里重點講一下參數(shù)propagate_down，指定是否反向傳播到每個底部。如果未指定， #Caffe會自動推斷每個輸入是否需要反向傳播來計算參數(shù)梯度。如果對某些輸入設置為true， #則強制向這些輸入反向傳播; 如果對某些輸入設置為false，則會跳過對這些輸入的反向傳播。 #大小必須是0或等于底部的數(shù)量。具體解讀cpp文件中的參數(shù)propagate_down[0]~[3] net[name] = L.MultiBoxLoss(*mbox_layers, multibox_loss_param=multibox_loss_param,loss_param=loss_param, include=dict(phase=caffe_pb2.Phase.Value('TRAIN')),propagate_down=[True, True, False, False])#打開文件將上面編輯的這些層寫入到prototxt文件中。 with open(train_net_file, 'w') as f:print('name: "{}_train"'.format(model_name), file=f)print(net.to_proto(), file=f) #將寫入的訓練文件train.prototxt復制一份給目錄job_dir。 shutil.copy(train_net_file, job_dir)#創(chuàng)建測試網(wǎng)絡。前一部分基本上與訓練網(wǎng)絡一致。 net = caffe.NetSpec() net.data, net.label = CreateAnnotatedDataLayer(test_data, batch_size=test_batch_size,train=False, output_label=True, label_map_file=label_map_file,transform_param=test_transform_param)VGGNetBody(net, from_layer='data', fully_conv=True, reduced=True, dilated=True,dropout=False, freeze_layers=freeze_layers)AddExtraLayers(net, use_batchnorm)mbox_layers = CreateMultiBoxHead(net, data_layer='data', from_layers=mbox_source_layers,use_batchnorm=use_batchnorm, min_sizes=min_sizes, max_sizes=max_sizes,aspect_ratios=aspect_ratios, normalizations=normalizations,num_classes=num_classes, share_location=share_location, flip=flip, clip=clip,prior_variance=prior_variance, kernel_size=3, pad=1)#置信的交叉驗證 conf_name = "mbox_conf" if multibox_loss_param["conf_loss_type"] == P.MultiBoxLoss.SOFTMAX:reshape_name = "{}_reshape".format(conf_name)net[reshape_name] = L.Reshape(net[conf_name], shape=dict(dim=[0, -1, num_classes]))softmax_name = "{}_softmax".format(conf_name)net[softmax_name] = L.Softmax(net[reshape_name], axis=2)flatten_name = "{}_flatten".format(conf_name)net[flatten_name] = L.Flatten(net[softmax_name], axis=1)mbox_layers[1] = net[flatten_name] elif multibox_loss_param["conf_loss_type"] == P.MultiBoxLoss.LOGISTIC:sigmoid_name = "{}_sigmoid".format(conf_name)net[sigmoid_name] = L.Sigmoid(net[conf_name])mbox_layers[1] = net[sigmoid_name]#下面這一部分是test網(wǎng)絡獨有的，為檢測輸出和評估網(wǎng)絡。 net.detection_out = L.DetectionOutput(*mbox_layers,detection_output_param=det_out_param,include=dict(phase=caffe_pb2.Phase.Value('TEST'))) net.detection_eval = L.DetectionEvaluate(net.detection_out, net.label,detection_evaluate_param=det_eval_param,include=dict(phase=caffe_pb2.Phase.Value('TEST')))with open(test_net_file, 'w') as f:print('name: "{}_test"'.format(model_name), file=f)print(net.to_proto(), file=f) shutil.copy(test_net_file, job_dir)# 創(chuàng)建deploy網(wǎng)絡。 # 從測試網(wǎng)中刪除第一層和最后一層。 deploy_net = net with open(deploy_net_file, 'w') as f:net_param = deploy_net.to_proto()# 從測試網(wǎng)中刪除第一個（AnnotatedData）和最后一個（DetectionEvaluate）層。 del net_param.layer[0] #刪除首層del net_param.layer[-1] #刪除尾層。net_param.name = '{}_deploy'.format(model_name) #創(chuàng)建網(wǎng)絡名net_param.input.extend(['data']) #輸入擴展為data。#deploy.prototxt文件中特有的輸入數(shù)據(jù)維度信息，這里應該為[1,3,300,300]。 net_param.input_shape.extend([caffe_pb2.BlobShape(dim=[1, 3, resize_height, resize_width])])print(net_param, file=f) #輸出到文件 shutil.copy(deploy_net_file, job_dir) #復制一份到job_dir中。# 創(chuàng)建Slover.prototxt。 solver = caffe_pb2.SolverParameter( #將上面定義的solver參數(shù)統(tǒng)統(tǒng)拿下來。 train_net=train_net_file,test_net=[test_net_file],snapshot_prefix=snapshot_prefix,**solver_param)#將拿下來的參數(shù)統(tǒng)統(tǒng)寫入solver.prototxt中。 with open(solver_file, 'w') as f:print(solver, file=f) #復制一份到job_dir中。 shutil.copy(solver_file, job_dir) #最大迭代次數(shù)首先初始化為0。 max_iter = 0 #找到最近的快照。即如果中途中斷訓練，再次訓練首先尋找上次中斷時保存的模型繼續(xù)訓練。 for file in os.listdir(snapshot_dir): #依次在快照模型所保存的文件中查找相對應的模型。if file.endswith(".solverstate"): #如果存在此模型，則繼續(xù)往下訓練。basename = os.path.splitext(file)[0]iter = int(basename.split("{}_iter_".format(model_name))[1])if iter > max_iter: #如果已迭代的次數(shù)大于max_iter，則賦值給max_iter。max_iter = iter#以下部分為訓練命令。 train_src_param = '' if os.path.isfile(pretrain_model): #權重的初始參數(shù)即從我們定義的imagenet訓練ＶＧＧ16模型中獲取。 train_src_param = '\t--weights={} ^\n'.format(os.path.normpath(pretrain_model)) if resume_training:if max_iter > 0:train_src_param = '\t--snapshot={}_iter_{}.solverstate ^\n'.format(os.path.normpath(snapshot_prefix), max_iter)#刪除任何小于max_iter的快照。上一段和本段程序主要的目的是隨著訓練的推進， ##max_iter隨之逐漸增大，知道訓練至120000次后把前面生成的快照模型都刪除了，就#是保存下一次的模型后刪除上一次的模型。 if remove_old_models:for file in os.listdir(snapshot_dir): #遍歷查找模型文件。if file.endswith(".solverstate"): #找到后綴為solverstate的模型文件。basename = os.path.splitext(file)[0]iter = int(basename.split("{}_iter_".format(model_name))[1]) #獲取已迭代的次數(shù)。if max_iter > iter: #如果迭代滿足條件，則下一條語句去刪除。os.remove("{}/{}".format(snapshot_dir, file))if file.endswith(".caffemodel"): #找到后綴為caffemodel的模型文件。basename = os.path.splitext(file)[0]iter = int(basename.split("{}_iter_".format(model_name))[1]) #獲取迭代次數(shù)iter。if max_iter > iter: #判斷如果滿足條件則刪除已存在的模型。os.remove("{}/{}".format(snapshot_dir, file))# 創(chuàng)建工作文件。 with open(job_file, 'w') as f: #將訓練文件寫入執(zhí)行文件中生成.sh可執(zhí)行文件后執(zhí)行命令訓練。f.write('SET GLOG_logtostderr=1\n')f.write('set Datum=%DATE:~6,4%_%DATE:~3,2%_%DATE:~0,2%\n')f.write('set Uhrzeit=%TIME:~0,2%_%TIME:~3,2%_%TIME:~6,2%\n')f.write('set TIMESTAMP=%Datum%_%Uhrzeit%\n')f.write('\n'.format(caffe_root))f.write('cd {}\n'.format(caffe_root))f.write('"Build\{}\Release\caffe" train ^\n'.format('x64'))f.write('\t--solver={} ^\n'.format(os.path.normpath(solver_file)))f.write(train_src_param) if solver_param['solver_mode'] == P.Solver.GPU:f.write('\t--gpu {} 2>&1 | "tools\mtee" "{}\{}-train-%TIMESTAMP%.log"\n'.format(gpus, os.path.normpath(job_dir), model_name)) else:f.write('\t2>&1 | "tools\mtee" "{}\{}-train-%TIMESTAMP%.log"\n'.format(os.path.normpath(job_dir), model_name))#復制本腳本只job_dir中。 py_file = os.path.abspath(__file__) shutil.copy(py_file, job_dir)# 運行。 os.chmod(job_file, stat.S_IRWXU) if run_soon:subprocess.call(os.path.normpath(job_file), shell=True)

結語

1.以上是關于ssd_pascal.py源碼的注示。
2.關于ssd_pascal.py源碼理解，都可以加這個群(487350510)互相討論學。

總結

以上是生活随笔為你收集整理的Ubuntu 16.04下Caffe-SSD的应用（四）——ssd_pascal.py源码解读的全部內(nèi)容，希望文章能夠幫你解決所遇到的問題。

如果覺得生活随笔網(wǎng)站內(nèi)容還不錯，歡迎將生活随笔推薦給好友。

上一篇： Windows7下Caffe-SSD的应
下一篇： Ubuntu 16.04下Caffe-S