yolov3网络结构笔记
生活随笔
收集整理的這篇文章主要介紹了
yolov3网络结构笔记
小編覺得挺不錯的,現(xiàn)在分享給大家,幫大家做個參考.
從yolo3_darknet53_coco(gluoncv/model_zoo/yolo/yolo3.py?)調(diào)試獲得,供參考
?
--------------------------------------------------------------------------------Layer (type) Output Shape Param #
================================================================================Input (1, 3, 512, 703) 0Conv2D-1 (1, 32, 512, 703) 864BatchNorm-2 (1, 32, 512, 703) 128LeakyReLU-3 (1, 32, 512, 703) 0Conv2D-4 (1, 64, 256, 352) 18432BatchNorm-5 (1, 64, 256, 352) 256LeakyReLU-6 (1, 64, 256, 352) 0Conv2D-7 (1, 32, 256, 352) 2048BatchNorm-8 (1, 32, 256, 352) 128LeakyReLU-9 (1, 32, 256, 352) 0Conv2D-10 (1, 64, 256, 352) 18432BatchNorm-11 (1, 64, 256, 352) 256LeakyReLU-12 (1, 64, 256, 352) 0
DarknetBasicBlockV3-13 (1, 64, 256, 352) 0Conv2D-14 (1, 128, 128, 176) 73728BatchNorm-15 (1, 128, 128, 176) 512LeakyReLU-16 (1, 128, 128, 176) 0Conv2D-17 (1, 64, 128, 176) 8192BatchNorm-18 (1, 64, 128, 176) 256LeakyReLU-19 (1, 64, 128, 176) 0Conv2D-20 (1, 128, 128, 176) 73728BatchNorm-21 (1, 128, 128, 176) 512LeakyReLU-22 (1, 128, 128, 176) 0
DarknetBasicBlockV3-23 (1, 128, 128, 176) 0Conv2D-24 (1, 64, 128, 176) 8192BatchNorm-25 (1, 64, 128, 176) 256LeakyReLU-26 (1, 64, 128, 176) 0Conv2D-27 (1, 128, 128, 176) 73728BatchNorm-28 (1, 128, 128, 176) 512LeakyReLU-29 (1, 128, 128, 176) 0
DarknetBasicBlockV3-30 (1, 128, 128, 176) 0Conv2D-31 (1, 256, 64, 88) 294912BatchNorm-32 (1, 256, 64, 88) 1024LeakyReLU-33 (1, 256, 64, 88) 0Conv2D-34 (1, 128, 64, 88) 32768BatchNorm-35 (1, 128, 64, 88) 512LeakyReLU-36 (1, 128, 64, 88) 0Conv2D-37 (1, 256, 64, 88) 294912BatchNorm-38 (1, 256, 64, 88) 1024LeakyReLU-39 (1, 256, 64, 88) 0
DarknetBasicBlockV3-40 (1, 256, 64, 88) 0Conv2D-41 (1, 128, 64, 88) 32768BatchNorm-42 (1, 128, 64, 88) 512LeakyReLU-43 (1, 128, 64, 88) 0Conv2D-44 (1, 256, 64, 88) 294912BatchNorm-45 (1, 256, 64, 88) 1024LeakyReLU-46 (1, 256, 64, 88) 0
DarknetBasicBlockV3-47 (1, 256, 64, 88) 0Conv2D-48 (1, 128, 64, 88) 32768BatchNorm-49 (1, 128, 64, 88) 512LeakyReLU-50 (1, 128, 64, 88) 0Conv2D-51 (1, 256, 64, 88) 294912BatchNorm-52 (1, 256, 64, 88) 1024LeakyReLU-53 (1, 256, 64, 88) 0
DarknetBasicBlockV3-54 (1, 256, 64, 88) 0Conv2D-55 (1, 128, 64, 88) 32768BatchNorm-56 (1, 128, 64, 88) 512LeakyReLU-57 (1, 128, 64, 88) 0Conv2D-58 (1, 256, 64, 88) 294912BatchNorm-59 (1, 256, 64, 88) 1024LeakyReLU-60 (1, 256, 64, 88) 0
DarknetBasicBlockV3-61 (1, 256, 64, 88) 0Conv2D-62 (1, 128, 64, 88) 32768BatchNorm-63 (1, 128, 64, 88) 512LeakyReLU-64 (1, 128, 64, 88) 0Conv2D-65 (1, 256, 64, 88) 294912BatchNorm-66 (1, 256, 64, 88) 1024LeakyReLU-67 (1, 256, 64, 88) 0
DarknetBasicBlockV3-68 (1, 256, 64, 88) 0Conv2D-69 (1, 128, 64, 88) 32768BatchNorm-70 (1, 128, 64, 88) 512LeakyReLU-71 (1, 128, 64, 88) 0Conv2D-72 (1, 256, 64, 88) 294912BatchNorm-73 (1, 256, 64, 88) 1024LeakyReLU-74 (1, 256, 64, 88) 0
DarknetBasicBlockV3-75 (1, 256, 64, 88) 0Conv2D-76 (1, 128, 64, 88) 32768BatchNorm-77 (1, 128, 64, 88) 512LeakyReLU-78 (1, 128, 64, 88) 0Conv2D-79 (1, 256, 64, 88) 294912BatchNorm-80 (1, 256, 64, 88) 1024LeakyReLU-81 (1, 256, 64, 88) 0
DarknetBasicBlockV3-82 (1, 256, 64, 88) 0Conv2D-83 (1, 128, 64, 88) 32768BatchNorm-84 (1, 128, 64, 88) 512LeakyReLU-85 (1, 128, 64, 88) 0Conv2D-86 (1, 256, 64, 88) 294912BatchNorm-87 (1, 256, 64, 88) 1024LeakyReLU-88 (1, 256, 64, 88) 0
DarknetBasicBlockV3-89 (1, 256, 64, 88) 0
darknet53.features[:15]
--------------------------------------------------------------------------------------------------Conv2D-90 (1, 512, 32, 44) 1179648BatchNorm-91 (1, 512, 32, 44) 2048LeakyReLU-92 (1, 512, 32, 44) 0Conv2D-93 (1, 256, 32, 44) 131072BatchNorm-94 (1, 256, 32, 44) 1024LeakyReLU-95 (1, 256, 32, 44) 0Conv2D-96 (1, 512, 32, 44) 1179648BatchNorm-97 (1, 512, 32, 44) 2048LeakyReLU-98 (1, 512, 32, 44) 0
DarknetBasicBlockV3-99 (1, 512, 32, 44) 0Conv2D-100 (1, 256, 32, 44) 131072BatchNorm-101 (1, 256, 32, 44) 1024LeakyReLU-102 (1, 256, 32, 44) 0Conv2D-103 (1, 512, 32, 44) 1179648BatchNorm-104 (1, 512, 32, 44) 2048LeakyReLU-105 (1, 512, 32, 44) 0
DarknetBasicBlockV3-106 (1, 512, 32, 44) 0Conv2D-107 (1, 256, 32, 44) 131072BatchNorm-108 (1, 256, 32, 44) 1024LeakyReLU-109 (1, 256, 32, 44) 0Conv2D-110 (1, 512, 32, 44) 1179648BatchNorm-111 (1, 512, 32, 44) 2048LeakyReLU-112 (1, 512, 32, 44) 0
DarknetBasicBlockV3-113 (1, 512, 32, 44) 0Conv2D-114 (1, 256, 32, 44) 131072BatchNorm-115 (1, 256, 32, 44) 1024LeakyReLU-116 (1, 256, 32, 44) 0Conv2D-117 (1, 512, 32, 44) 1179648BatchNorm-118 (1, 512, 32, 44) 2048LeakyReLU-119 (1, 512, 32, 44) 0
DarknetBasicBlockV3-120 (1, 512, 32, 44) 0Conv2D-121 (1, 256, 32, 44) 131072BatchNorm-122 (1, 256, 32, 44) 1024LeakyReLU-123 (1, 256, 32, 44) 0Conv2D-124 (1, 512, 32, 44) 1179648BatchNorm-125 (1, 512, 32, 44) 2048LeakyReLU-126 (1, 512, 32, 44) 0
DarknetBasicBlockV3-127 (1, 512, 32, 44) 0Conv2D-128 (1, 256, 32, 44) 131072BatchNorm-129 (1, 256, 32, 44) 1024LeakyReLU-130 (1, 256, 32, 44) 0Conv2D-131 (1, 512, 32, 44) 1179648BatchNorm-132 (1, 512, 32, 44) 2048LeakyReLU-133 (1, 512, 32, 44) 0
DarknetBasicBlockV3-134 (1, 512, 32, 44) 0Conv2D-135 (1, 256, 32, 44) 131072BatchNorm-136 (1, 256, 32, 44) 1024LeakyReLU-137 (1, 256, 32, 44) 0Conv2D-138 (1, 512, 32, 44) 1179648BatchNorm-139 (1, 512, 32, 44) 2048LeakyReLU-140 (1, 512, 32, 44) 0
DarknetBasicBlockV3-141 (1, 512, 32, 44) 0Conv2D-142 (1, 256, 32, 44) 131072BatchNorm-143 (1, 256, 32, 44) 1024LeakyReLU-144 (1, 256, 32, 44) 0Conv2D-145 (1, 512, 32, 44) 1179648BatchNorm-146 (1, 512, 32, 44) 2048LeakyReLU-147 (1, 512, 32, 44) 0
DarknetBasicBlockV3-148 (1, 512, 32, 44) 0
darknet53.features[15:24]
--------------------------------------------------------------------------------------------------Conv2D-149 (1, 1024, 16, 22) 4718592BatchNorm-150 (1, 1024, 16, 22) 4096LeakyReLU-151 (1, 1024, 16, 22) 0Conv2D-152 (1, 512, 16, 22) 524288BatchNorm-153 (1, 512, 16, 22) 2048LeakyReLU-154 (1, 512, 16, 22) 0Conv2D-155 (1, 1024, 16, 22) 4718592BatchNorm-156 (1, 1024, 16, 22) 4096LeakyReLU-157 (1, 1024, 16, 22) 0
DarknetBasicBlockV3-158 (1, 1024, 16, 22) 0Conv2D-159 (1, 512, 16, 22) 524288BatchNorm-160 (1, 512, 16, 22) 2048LeakyReLU-161 (1, 512, 16, 22) 0Conv2D-162 (1, 1024, 16, 22) 4718592BatchNorm-163 (1, 1024, 16, 22) 4096LeakyReLU-164 (1, 1024, 16, 22) 0
DarknetBasicBlockV3-165 (1, 1024, 16, 22) 0Conv2D-166 (1, 512, 16, 22) 524288BatchNorm-167 (1, 512, 16, 22) 2048LeakyReLU-168 (1, 512, 16, 22) 0Conv2D-169 (1, 1024, 16, 22) 4718592BatchNorm-170 (1, 1024, 16, 22) 4096LeakyReLU-171 (1, 1024, 16, 22) 0
DarknetBasicBlockV3-172 (1, 1024, 16, 22) 0Conv2D-173 (1, 512, 16, 22) 524288BatchNorm-174 (1, 512, 16, 22) 2048LeakyReLU-175 (1, 512, 16, 22) 0Conv2D-176 (1, 1024, 16, 22) 4718592BatchNorm-177 (1, 1024, 16, 22) 4096LeakyReLU-178 (1, 1024, 16, 22) 0
DarknetBasicBlockV3-179 (1, 1024, 16, 22) 0
darknet53.features[24:]
--------------------------------------------------------------------------------------------------Conv2D-180 (1, 512, 16, 22) 524288BatchNorm-181 (1, 512, 16, 22) 2048LeakyReLU-182 (1, 512, 16, 22) 0Conv2D-183 (1, 1024, 16, 22) 4718592BatchNorm-184 (1, 1024, 16, 22) 4096LeakyReLU-185 (1, 1024, 16, 22) 0Conv2D-186 (1, 512, 16, 22) 524288BatchNorm-187 (1, 512, 16, 22) 2048LeakyReLU-188 (1, 512, 16, 22) 0Conv2D-189 (1, 1024, 16, 22) 4718592BatchNorm-190 (1, 1024, 16, 22) 4096LeakyReLU-191 (1, 1024, 16, 22) 0Conv2D-192 (1, 512, 16, 22) 524288BatchNorm-193 (1, 512, 16, 22) 2048LeakyReLU-194 (1, 512, 16, 22) 0
YOLODetectionBlockV3.body
--------------------------------------------------------------------------------------------------Conv2D-195 (1, 1024, 16, 22) 4718592BatchNorm-196 (1, 1024, 16, 22) 4096LeakyReLU-197 (1, 1024, 16, 22) 0
YOLODetectionBlockV3.tip
--------------------------------------------------------------------------------------------------
YOLODetectionBlockV3-198 (1, 512, 16, 22), (1, 1024, 16, 22) 0
--------------------------------------------------------------------------------------------------Conv2D-199 (1, 255, 16, 22) 261375
YOLOOutputV3.prediction
--------------------------------------------------------------------------------------------------YOLOOutputV3-200 (1, 84480, 6) 32774
YOLOOutputV3
--------------------------------------------------------------------------------------------------Conv2D-201 (1, 256, 16, 22) 131072BatchNorm-202 (1, 256, 16, 22) 1024LeakyReLU-203 (1, 256, 16, 22) 0
YOLOV3.transitions[0]----->_conv2d
--------------------------------------------------------------------------------------------------Conv2D-204 (1, 256, 32, 44) 196608BatchNorm-205 (1, 256, 32, 44) 1024LeakyReLU-206 (1, 256, 32, 44) 0Conv2D-207 (1, 512, 32, 44) 1179648BatchNorm-208 (1, 512, 32, 44) 2048LeakyReLU-209 (1, 512, 32, 44) 0Conv2D-210 (1, 256, 32, 44) 131072BatchNorm-211 (1, 256, 32, 44) 1024LeakyReLU-212 (1, 256, 32, 44) 0Conv2D-213 (1, 512, 32, 44) 1179648BatchNorm-214 (1, 512, 32, 44) 2048LeakyReLU-215 (1, 512, 32, 44) 0Conv2D-216 (1, 256, 32, 44) 131072BatchNorm-217 (1, 256, 32, 44) 1024LeakyReLU-218 (1, 256, 32, 44) 0
YOLODetectionBlockV3.body
--------------------------------------------------------------------------------------------------Conv2D-219 (1, 512, 32, 44) 1179648BatchNorm-220 (1, 512, 32, 44) 2048LeakyReLU-221 (1, 512, 32, 44) 0
YOLODetectionBlockV3.tip
--------------------------------------------------------------------------------------------------
YOLODetectionBlockV3-222 (1, 256, 32, 44), (1, 512, 32, 44) 0
--------------------------------------------------------------------------------------------------Conv2D-223 (1, 255, 32, 44) 130815
YOLOOutputV3.prediction
--------------------------------------------------------------------------------------------------YOLOOutputV3-224 (1, 337920, 6) 32774
YOLOOutputV3
--------------------------------------------------------------------------------------------------Conv2D-225 (1, 128, 32, 44) 32768BatchNorm-226 (1, 128, 32, 44) 512LeakyReLU-227 (1, 128, 32, 44) 0
YOLOV3.transitions[1]----->_conv2d
--------------------------------------------------------------------------------------------------Conv2D-228 (1, 128, 64, 88) 49152BatchNorm-229 (1, 128, 64, 88) 512LeakyReLU-230 (1, 128, 64, 88) 0Conv2D-231 (1, 256, 64, 88) 294912BatchNorm-232 (1, 256, 64, 88) 1024LeakyReLU-233 (1, 256, 64, 88) 0Conv2D-234 (1, 128, 64, 88) 32768BatchNorm-235 (1, 128, 64, 88) 512LeakyReLU-236 (1, 128, 64, 88) 0Conv2D-237 (1, 256, 64, 88) 294912BatchNorm-238 (1, 256, 64, 88) 1024LeakyReLU-239 (1, 256, 64, 88) 0Conv2D-240 (1, 128, 64, 88) 32768BatchNorm-241 (1, 128, 64, 88) 512LeakyReLU-242 (1, 128, 64, 88) 0
YOLODetectionBlockV3.body
--------------------------------------------------------------------------------------------------Conv2D-243 (1, 256, 64, 88) 294912BatchNorm-244 (1, 256, 64, 88) 1024LeakyReLU-245 (1, 256, 64, 88) 0
YOLODetectionBlockV3.tip
--------------------------------------------------------------------------------------------------
YOLODetectionBlockV3-246 (1, 128, 64, 88), (1, 256, 64, 88) 0
--------------------------------------------------------------------------------------------------Conv2D-247 (1, 255, 64, 88) 65535
YOLOOutputV3.prediction
--------------------------------------------------------------------------------------------------YOLOOutputV3-248 (1, 1351680, 6) 32774
YOLOOutputV3
--------------------------------------------------------------------------------------------------YOLOV3-249 (1, 100, 1), (1, 100, 1), (1, 100, 4) 0
================================================================================
?
打印日志如下,有用的沒用的都放這里了
darknet_version = v3
num_layers = 53
darknet layers = [1, 2, 8, 8, 4]
darknet channels = [32, 64, 128, 256, 512, 1024]
anchors = [116. 90. 156. 198. 373. 326.]
self._num_pred = 85
self._num_anchors = 3
all_pred = 255
anchors = [[[[116. 90.][156. 198.][373. 326.]]]]
self.anchors =
[[[[116. 90.][156. 198.][373. 326.]]]]
<NDArray 1x1x3x2 @cpu(0)>
anchors = [ 30. 61. 62. 45. 59. 119.]
self._num_pred = 85
self._num_anchors = 3
all_pred = 255
anchors = [[[[ 30. 61.][ 62. 45.][ 59. 119.]]]]
self.anchors =
[[[[ 30. 61.][ 62. 45.][ 59. 119.]]]]
<NDArray 1x1x3x2 @cpu(0)>
anchors = [10. 13. 16. 30. 33. 23.]
self._num_pred = 85
self._num_anchors = 3
all_pred = 255
anchors = [[[[10. 13.][16. 30.][33. 23.]]]]
self.anchors =
[[[[10. 13.][16. 30.][33. 23.]]]]
<NDArray 1x1x3x2 @cpu(0)>
Shape of pre-processed image: (1, 3, 512, 703)
x.shape = (1, 256, 64, 88)
x.shape = (1, 512, 32, 44)
x.shape = (1, 1024, 16, 22)
pred.shape = (1, 255, 16, 22)
pred.shape = (1, 255, 352)
pred.shape = (1, 352, 3, 85)
raw_box_centers.shape = (1, 352, 3, 2)
raw_box_scales.shape = (1, 352, 3, 2)
objness.shape = (1, 352, 3, 1)
class_pred.shape = (1, 352, 3, 80)
offsets.shape = (1, 1, 128, 128, 2)
slice_like offsets.shape = (1, 1, 16, 22, 2)
reshape offsets.shape = (1, 352, 1, 2)
box_centers.shape = (1, 352, 3, 2)
anchors.shape = (1, 1, 3, 2)
box_scales.shape = (1, 352, 3, 2)
confidence.shape = (1, 352, 3, 1)
class_score.shape = (1, 352, 3, 80)
bbox.shape = (1, 352, 3, 4)
bboxes.shape = (80, 1, 352, 3, 4)
scores.shape = (80, 1, 352, 3, 1)
ids.shape = (80, 1, 352, 3, 1)
detections = (80, 1, 352, 3, 6)
detections = (1, 84480, 6)
dets.shape = (1, 84480, 6)
self.transitions = <class 'mxnet.gluon.nn.basic_layers.HybridSequential'>
x.shape = (1, 256, 16, 22)
upsample.shape = (1, 256, 32, 44)
route_now.shape = (1, 512, 32, 44)
x.shape = (1, 768, 32, 44)
pred.shape = (1, 255, 32, 44)
pred.shape = (1, 255, 1408)
pred.shape = (1, 1408, 3, 85)
raw_box_centers.shape = (1, 1408, 3, 2)
raw_box_scales.shape = (1, 1408, 3, 2)
objness.shape = (1, 1408, 3, 1)
class_pred.shape = (1, 1408, 3, 80)
offsets.shape = (1, 1, 128, 128, 2)
slice_like offsets.shape = (1, 1, 32, 44, 2)
reshape offsets.shape = (1, 1408, 1, 2)
box_centers.shape = (1, 1408, 3, 2)
anchors.shape = (1, 1, 3, 2)
box_scales.shape = (1, 1408, 3, 2)
confidence.shape = (1, 1408, 3, 1)
class_score.shape = (1, 1408, 3, 80)
bbox.shape = (1, 1408, 3, 4)
bboxes.shape = (80, 1, 1408, 3, 4)
scores.shape = (80, 1, 1408, 3, 1)
ids.shape = (80, 1, 1408, 3, 1)
detections = (80, 1, 1408, 3, 6)
detections = (1, 337920, 6)
dets.shape = (1, 337920, 6)
self.transitions = <class 'mxnet.gluon.nn.basic_layers.HybridSequential'>
x.shape = (1, 128, 32, 44)
upsample.shape = (1, 128, 64, 88)
route_now.shape = (1, 256, 64, 88)
x.shape = (1, 384, 64, 88)
pred.shape = (1, 255, 64, 88)
pred.shape = (1, 255, 5632)
pred.shape = (1, 5632, 3, 85)
raw_box_centers.shape = (1, 5632, 3, 2)
raw_box_scales.shape = (1, 5632, 3, 2)
objness.shape = (1, 5632, 3, 1)
class_pred.shape = (1, 5632, 3, 80)
offsets.shape = (1, 1, 128, 128, 2)
slice_like offsets.shape = (1, 1, 64, 88, 2)
reshape offsets.shape = (1, 5632, 1, 2)
box_centers.shape = (1, 5632, 3, 2)
anchors.shape = (1, 1, 3, 2)
box_scales.shape = (1, 5632, 3, 2)
confidence.shape = (1, 5632, 3, 1)
class_score.shape = (1, 5632, 3, 80)
bbox.shape = (1, 5632, 3, 4)
bboxes.shape = (80, 1, 5632, 3, 4)
scores.shape = (80, 1, 5632, 3, 1)
ids.shape = (80, 1, 5632, 3, 1)
detections = (80, 1, 5632, 3, 6)
detections = (1, 1351680, 6)
dets.shape = (1, 1351680, 6)
all_detections.shape = 3
all_detections[0].shape = (1, 84480, 6)
result.shape = (1, 1774080, 6)
after result.shape = (1, 1774080, 6)
slice result.shape = (1, 100, 6)
自己總結(jié)的計算過程如下:
img ——> darknet53 ——> YOLODetectionBlockV3.body ——> YOLODetectionBlockV3.tip ——> YOLOOutputV3.prediction ——>
(reshape, transpose, slice_axis) ——> [raw_box_centers(2), raw_box_scales(2), objness(1), class_pred(80)] ——>
[box_centers(offsets, _stride), box_scales(anchors), confidence, class_score(class_pred * confidence)] ——>
[bboxes(4), scores(1)] + ids(from arrange,1) ——> (reshape) ——> detections ——> box_nms ——> slice_axis ——>
(ids, scores, bboxes)
class YOLOV3(gluon.HybridBlock): 添加的打印如下:
def hybrid_forward(self, F, x, *args):"""YOLOV3 network hybrid forward.Parameters----------F : mxnet.nd or mxnet.sym`F` is mxnet.sym if hybridized or mxnet.nd if not.x : mxnet.nd.NDArrayInput data.*args : optional, mxnet.nd.NDArrayDuring training, extra inputs are required:(gt_boxes, obj_t, centers_t, scales_t, weights_t, clas_t)These are generated by YOLOV3PrefetchTargetGenerator in dataloader transform function.Returns-------(tuple of) mxnet.nd.NDArrayDuring inference, return detections in shape (B, N, 6)with format (cid, score, xmin, ymin, xmax, ymax)During training, return losses only: (obj_loss, center_loss, scale_loss, cls_loss)."""all_box_centers = []all_box_scales = []all_objectness = []all_class_pred = []all_anchors = []all_offsets = []all_feat_maps = []all_detections = []routes = []for stage in self.stages:x = stage(x)routes.append(x)print("x.shape = ", x.shape)# the YOLO output layers are used in reverse order, i.e., from very deep layers to shallowfor i, block, output in zip(range(len(routes)), self.yolo_blocks, self.yolo_outputs):x, tip = block(x)dets = output(tip)print("dets.shape = ", dets.shape)all_detections.append(dets)if i >= len(routes) - 1:break# add transition layersprint("self.transitions = ", type(self.transitions))x = self.transitions[i](x)print("x.shape = ", x.shape)# upsample feature map reverse to shallow layersupsample = _upsample(x, stride=2)print("upsample.shape = ", upsample.shape)route_now = routes[::-1][i + 1]print("route_now.shape = ", route_now.shape)x = F.concat(upsample, route_now, dim=1)print("x.shape = ", x.shape)print("all_detections.shape = ", len(all_detections))print("all_detections[0].shape = ", all_detections[0].shape)# concat all detection results from different stagesresult = F.concat(*all_detections, dim=1)print("result.shape = ", result.shape)# apply nms per classif self.nms_thresh > 0 and self.nms_thresh < 1:result = F.contrib.box_nms(result, overlap_thresh=self.nms_thresh, valid_thresh=0.01,topk=self.nms_topk, id_index=0, score_index=1, coord_start=2, force_suppress=False)print("after result.shape = ", result.shape)if self.post_nms > 0:result = result.slice_axis(axis=1, begin=0, end=self.post_nms)print("slice result.shape = ", result.shape)ids = result.slice_axis(axis=-1, begin=0, end=1)scores = result.slice_axis(axis=-1, begin=1, end=2)bboxes = result.slice_axis(axis=-1, begin=2, end=None)return ids, scores, bboxes
class YOLOOutputV3(gluon.HybridBlock):添加的打印如下:
class YOLOOutputV3(gluon.HybridBlock):"""YOLO output layer V3.Parameters----------index : intIndex of the yolo output layer, to avoid naming conflicts only.num_class : intNumber of foreground objects.anchors : iterableThe anchor setting. Reference: https://arxiv.org/pdf/1804.02767.pdf.stride : intStride of feature map.alloc_size : tuple of int, default is (128, 128)For advanced users. Define `alloc_size` to generate large enough anchormaps, which will later saved in parameters. During inference, we support arbitraryinput image by cropping corresponding area of the anchor map. This allow usto export to symbol so we can run it in c++, Scalar, etc."""def __init__(self, index, num_class, anchors, stride,alloc_size=(128, 128), **kwargs):super(YOLOOutputV3, self).__init__(**kwargs)anchors = np.array(anchors).astype('float32')print("anchors = ", anchors)self._classes = num_classself._num_pred = 1 + 4 + num_class # 1 objness + 4 box + num_classself._num_anchors = anchors.size // 2self._stride = stridewith self.name_scope():print("self._num_pred = ", self._num_pred)print("self._num_anchors = ", self._num_anchors)all_pred = self._num_pred * self._num_anchorsprint("all_pred = %d" % all_pred)self.prediction = nn.Conv2D(all_pred, kernel_size=1, padding=0, strides=1)# anchors will be multiplied to predictionsanchors = anchors.reshape(1, 1, -1, 2)print("anchors = ", anchors)self.anchors = self.params.get_constant('anchor_%d'%(index), anchors)print("self.anchors = ", self.anchors.value)# offsets will be added to predictionsgrid_x = np.arange(alloc_size[1])grid_y = np.arange(alloc_size[0])grid_x, grid_y = np.meshgrid(grid_x, grid_y)# stack to (n, n, 2)offsets = np.concatenate((grid_x[:, :, np.newaxis], grid_y[:, :, np.newaxis]), axis=-1)# expand dims to (1, 1, n, n, 2) so it's easier for broadcastingoffsets = np.expand_dims(np.expand_dims(offsets, axis=0), axis=0)self.offsets = self.params.get_constant('offset_%d'%(index), offsets)def hybrid_forward(self, F, x, anchors, offsets):"""Hybrid Forward of YOLOV3Output layer.Parameters----------F : mxnet.nd or mxnet.sym`F` is mxnet.sym if hybridized or mxnet.nd if not.x : mxnet.nd.NDArrayInput feature map.anchors : mxnet.nd.NDArrayAnchors loaded from self, no need to supply.offsets : mxnet.nd.NDArrayOffsets loaded from self, no need to supply.Returns-------(tuple of) mxnet.nd.NDArrayDuring training, return (bbox, raw_box_centers, raw_box_scales, objness,class_pred, anchors, offsets).During inference, return detections."""# prediction flat to (batch, pred per pixel, height * width)pred = self.prediction(x)print("pred.shape = ", pred.shape)pred = pred.reshape((0, self._num_anchors * self._num_pred, -1))print("pred.shape = ", pred.shape)# transpose to (batch, height * width, num_anchor, num_pred)pred = pred.transpose(axes=(0, 2, 1)).reshape((0, -1, self._num_anchors, self._num_pred))print("pred.shape = ", pred.shape)# componentsraw_box_centers = pred.slice_axis(axis=-1, begin=0, end=2)print("raw_box_centers.shape = ", raw_box_centers.shape)raw_box_scales = pred.slice_axis(axis=-1, begin=2, end=4)print("raw_box_scales.shape = ", raw_box_scales.shape)objness = pred.slice_axis(axis=-1, begin=4, end=5)print("objness.shape = ", objness.shape)class_pred = pred.slice_axis(axis=-1, begin=5, end=None)print("class_pred.shape = ", class_pred.shape)# valid offsets, (1, 1, height, width, 2)print("offsets.shape = ", offsets.shape)offsets = F.slice_like(offsets, x * 0, axes=(2, 3))print("slice_like offsets.shape = ", offsets.shape)# reshape to (1, height*width, 1, 2)offsets = offsets.reshape((1, -1, 1, 2))print("reshape offsets.shape = ", offsets.shape)# print(offsets)box_centers = F.broadcast_add(F.sigmoid(raw_box_centers), offsets) * self._strideprint("box_centers.shape = ", box_centers.shape)print("anchors.shape = ", anchors.shape)box_scales = F.broadcast_mul(F.exp(raw_box_scales), anchors)print("box_scales.shape = ", box_scales.shape)confidence = F.sigmoid(objness)print("confidence.shape = ", confidence.shape)class_score = F.broadcast_mul(F.sigmoid(class_pred), confidence)print("class_score.shape = ", class_score.shape)wh = box_scales / 2.0bbox = F.concat(box_centers - wh, box_centers + wh, dim=-1)print("bbox.shape = ", bbox.shape)# prediction per classbboxes = F.tile(bbox, reps=(self._classes, 1, 1, 1, 1))print("bboxes.shape = ", bboxes.shape)scores = F.transpose(class_score, axes=(3, 0, 1, 2)).expand_dims(axis=-1)print("scores.shape = ", scores.shape)ids = F.broadcast_add(scores * 0, F.arange(0, self._classes).reshape((0, 1, 1, 1, 1)))print("ids.shape = ", ids.shape)detections = F.concat(ids, scores, bboxes, dim=-1)print("detections = ", detections.shape)# reshape to (B, xx, 6)detections = F.reshape(detections.transpose(axes=(1, 0, 2, 3, 4)), (0, -1, 6))print("detections = ", detections.shape)return detections
?
總結(jié)
以上是生活随笔為你收集整理的yolov3网络结构笔记的全部內(nèi)容,希望文章能夠幫你解決所遇到的問題。
- 上一篇: darknet53的网络结构笔记
- 下一篇: mask rcnn网络结构笔记