當前位置：首頁 > 人工智能 > 目标检测 >内容正文

目标检测

DL之Yolov3：基于深度学习Yolov3算法实现视频目标检测之对《我要打篮球》视频段进行实时目标检测

發布時間：2025/3/21 目标检测 74 豆豆

生活随笔收集整理的這篇文章主要介紹了 DL之Yolov3：基于深度学习Yolov3算法实现视频目标检测之对《我要打篮球》视频段进行实时目标检测小編覺得挺不錯的,現在分享給大家,幫大家做個參考.

DL之Yolov3：基于深度學習Yolov3算法實現視頻目標檢測之對《我要打籃球》視頻段進行實時目標檢測

輸出結果

設計思路

核心代碼

相關文章
成功解決AttributeError: 'NoneType' object has no attribute '__array_interface__'

輸出結果

更新……

設計思路

DL之YoloV3：Yolo V3算法的簡介(論文介紹)、架構詳解、案例應用等配圖集合之詳細攻略

核心代碼

更新……

Yolov3代碼實現的時候，是將輸入的視頻分為幀圖像進行目標檢測的！

1、yolo.py文件

# -*- coding: utf-8 -*- """ Class definition of YOLO_v3 style detection model on image and video """import colorsys import os from timeit import default_timer as timerimport numpy as np from keras import backend as K from keras.models import load_model from keras.layers import Input from PIL import Image, ImageFont, ImageDrawfrom yolo3.model import yolo_eval, yolo_body, tiny_yolo_body from yolo3.utils import letterbox_image import os from keras.utils import multi_gpu_modelclass YOLO(object):_defaults = {"model_path": 'model_data/yolo.h5',"anchors_path": 'model_data/yolo_anchors.txt',"classes_path": 'model_data/coco_classes.txt',"score" : 0.3,"iou" : 0.45,"model_image_size" : (416, 416),"gpu_num" : 1,}@classmethoddef get_defaults(cls, n):if n in cls._defaults:return cls._defaults[n]else:return "Unrecognized attribute name '" + n + "'"def __init__(self, **kwargs):self.__dict__.update(self._defaults) # set up default valuesself.__dict__.update(kwargs) # and update with user overridesself.class_names = self._get_class()self.anchors = self._get_anchors()self.sess = K.get_session()self.boxes, self.scores, self.classes = self.generate()def _get_class(self):classes_path = os.path.expanduser(self.classes_path)with open(classes_path) as f:class_names = f.readlines()class_names = [c.strip() for c in class_names]return class_namesdef _get_anchors(self):anchors_path = os.path.expanduser(self.anchors_path)with open(anchors_path) as f:anchors = f.readline()anchors = [float(x) for x in anchors.split(',')]return np.array(anchors).reshape(-1, 2)def generate(self):model_path = os.path.expanduser(self.model_path)assert model_path.endswith('.h5'), 'Keras model or weights must be a .h5 file.'# Load model, or construct model and load weights.num_anchors = len(self.anchors)num_classes = len(self.class_names)is_tiny_version = num_anchors==6 # default settingtry:self.yolo_model = load_model(model_path, compile=False)except:self.yolo_model = tiny_yolo_body(Input(shape=(None,None,3)), num_anchors//2, num_classes) \if is_tiny_version else yolo_body(Input(shape=(None,None,3)), num_anchors//3, num_classes)self.yolo_model.load_weights(self.model_path) # make sure model, anchors and classes matchelse:assert self.yolo_model.layers[-1].output_shape[-1] == \num_anchors/len(self.yolo_model.output) * (num_classes + 5), \'Mismatch between model and given anchor and class sizes'print('{} model, anchors, and classes loaded.'.format(model_path))# Generate colors for drawing bounding boxes.hsv_tuples = [(x / len(self.class_names), 1., 1.)for x in range(len(self.class_names))]self.colors = list(map(lambda x: colorsys.hsv_to_rgb(*x), hsv_tuples))self.colors = list(map(lambda x: (int(x[0] * 255), int(x[1] * 255), int(x[2] * 255)),self.colors))np.random.seed(10101) # Fixed seed for consistent colors across runs.np.random.shuffle(self.colors) # Shuffle colors to decorrelate adjacent classes.np.random.seed(None) # Reset seed to default.# Generate output tensor targets for filtered bounding boxes.self.input_image_shape = K.placeholder(shape=(2, ))if self.gpu_num>=2:self.yolo_model = multi_gpu_model(self.yolo_model, gpus=self.gpu_num)boxes, scores, classes = yolo_eval(self.yolo_model.output, self.anchors,len(self.class_names), self.input_image_shape,score_threshold=self.score, iou_threshold=self.iou)return boxes, scores, classesdef detect_image(self, image):start = timer()if self.model_image_size != (None, None):assert self.model_image_size[0]%32 == 0, 'Multiples of 32 required'assert self.model_image_size[1]%32 == 0, 'Multiples of 32 required'boxed_image = letterbox_image(image, tuple(reversed(self.model_image_size)))else:new_image_size = (image.width - (image.width % 32),image.height - (image.height % 32))boxed_image = letterbox_image(image, new_image_size)image_data = np.array(boxed_image, dtype='float32')print(image_data.shape)image_data /= 255.image_data = np.expand_dims(image_data, 0) # Add batch dimension.out_boxes, out_scores, out_classes = self.sess.run([self.boxes, self.scores, self.classes],feed_dict={self.yolo_model.input: image_data,self.input_image_shape: [image.size[1], image.size[0]],K.learning_phase(): 0})print('Found {} boxes for {}'.format(len(out_boxes), 'img'))font = ImageFont.truetype(font='font/FiraMono-Medium.otf',size=np.floor(3e-2 * image.size[1] + 0.5).astype('int32'))thickness = (image.size[0] + image.size[1]) // 300for i, c in reversed(list(enumerate(out_classes))):predicted_class = self.class_names[c]box = out_boxes[i]score = out_scores[i]label = '{} {:.2f}'.format(predicted_class, score)draw = ImageDraw.Draw(image)label_size = draw.textsize(label, font)top, left, bottom, right = boxtop = max(0, np.floor(top + 0.5).astype('int32'))left = max(0, np.floor(left + 0.5).astype('int32'))bottom = min(image.size[1], np.floor(bottom + 0.5).astype('int32'))right = min(image.size[0], np.floor(right + 0.5).astype('int32'))print(label, (left, top), (right, bottom))if top - label_size[1] >= 0:text_origin = np.array([left, top - label_size[1]])else:text_origin = np.array([left, top + 1])# My kingdom for a good redistributable image drawing library.for i in range(thickness):draw.rectangle([left + i, top + i, right - i, bottom - i],outline=self.colors[c])draw.rectangle([tuple(text_origin), tuple(text_origin + label_size)],fill=self.colors[c])draw.text(text_origin, label, fill=(0, 0, 0), font=font)del drawend = timer()print(end - start)return imagedef close_session(self):self.sess.close()def detect_video(yolo, video_path, output_path=""):import cv2vid = cv2.VideoCapture(video_path)if not vid.isOpened():raise IOError("Couldn't open webcam or video")video_FourCC = int(vid.get(cv2.CAP_PROP_FOURCC))video_fps = vid.get(cv2.CAP_PROP_FPS)video_size = (int(vid.get(cv2.CAP_PROP_FRAME_WIDTH)),int(vid.get(cv2.CAP_PROP_FRAME_HEIGHT)))isOutput = True if output_path != "" else Falseif isOutput:print("!!! TYPE:", type(output_path), type(video_FourCC), type(video_fps), type(video_size))out = cv2.VideoWriter(output_path, video_FourCC, video_fps, video_size)accum_time = 0curr_fps = 0fps = "FPS: ??"prev_time = timer()while True:return_value, frame = vid.read()image = Image.fromarray(frame)image = yolo.detect_image(image)result = np.asarray(image)curr_time = timer()exec_time = curr_time - prev_timeprev_time = curr_timeaccum_time = accum_time + exec_timecurr_fps = curr_fps + 1if accum_time > 1:accum_time = accum_time - 1fps = "FPS: " + str(curr_fps)curr_fps = 0cv2.putText(result, text=fps, org=(3, 15), fontFace=cv2.FONT_HERSHEY_SIMPLEX,fontScale=0.50, color=(255, 0, 0), thickness=2)cv2.namedWindow("result", cv2.WINDOW_NORMAL)cv2.imshow("result", result)if isOutput:out.write(result)if cv2.waitKey(1) & 0xFF == ord('q'):breakyolo.close_session()

2、model.py文件

? ? ? 上邊網絡模型的代碼定義，比如DarknetConv2D、DarknetConv2D_BN_Leaky、resblock_body、darknet_body、make_last_layers、yolo_body、yolo_head、yolo_eval等函數，其中preprocess_true_boxes()函數找出人工標定框GT框，通過計算候選框和GT框的IOU，才會找出best_abchor。
? ? ? ?可以參考網絡模型圖去理解。詳見DL之Yolov3：基于深度學習Yolov3算法實現視頻目標檢測之對《我要打籃球》視頻段進行實時目標檢測?的model.py文件。

"""YOLO_v3 Model Defined in Keras."""from functools import wrapsimport numpy as np import tensorflow as tf from keras import backend as K from keras.layers import Conv2D, Add, ZeroPadding2D, UpSampling2D, Concatenate, MaxPooling2D from keras.layers.advanced_activations import LeakyReLU from keras.layers.normalization import BatchNormalization from keras.models import Model from keras.regularizers import l2from yolo3.utils import compose@wraps(Conv2D) def DarknetConv2D(*args, **kwargs):"""Wrapper to set Darknet parameters for Convolution2D."""darknet_conv_kwargs = {'kernel_regularizer': l2(5e-4)}darknet_conv_kwargs['padding'] = 'valid' if kwargs.get('strides')==(2,2) else 'same'darknet_conv_kwargs.update(kwargs)return Conv2D(*args, **darknet_conv_kwargs)def DarknetConv2D_BN_Leaky(*args, **kwargs):"""Darknet Convolution2D followed by BatchNormalization and LeakyReLU."""no_bias_kwargs = {'use_bias': False}no_bias_kwargs.update(kwargs)return compose(DarknetConv2D(*args, **no_bias_kwargs),BatchNormalization(),LeakyReLU(alpha=0.1))def resblock_body(x, num_filters, num_blocks):'''A series of resblocks starting with a downsampling Convolution2D'''# Darknet uses left and top padding instead of 'same' modex = ZeroPadding2D(((1,0),(1,0)))(x)x = DarknetConv2D_BN_Leaky(num_filters, (3,3), strides=(2,2))(x)for i in range(num_blocks):y = compose(DarknetConv2D_BN_Leaky(num_filters//2, (1,1)),DarknetConv2D_BN_Leaky(num_filters, (3,3)))(x)x = Add()([x,y])return xdef darknet_body(x):'''Darknent body having 52 Convolution2D layers'''x = DarknetConv2D_BN_Leaky(32, (3,3))(x)x = resblock_body(x, 64, 1)x = resblock_body(x, 128, 2)x = resblock_body(x, 256, 8)x = resblock_body(x, 512, 8)x = resblock_body(x, 1024, 4)return xdef make_last_layers(x, num_filters, out_filters):'''6 Conv2D_BN_Leaky layers followed by a Conv2D_linear layer'''x = compose(DarknetConv2D_BN_Leaky(num_filters, (1,1)),DarknetConv2D_BN_Leaky(num_filters*2, (3,3)),DarknetConv2D_BN_Leaky(num_filters, (1,1)),DarknetConv2D_BN_Leaky(num_filters*2, (3,3)),DarknetConv2D_BN_Leaky(num_filters, (1,1)))(x)y = compose(DarknetConv2D_BN_Leaky(num_filters*2, (3,3)),DarknetConv2D(out_filters, (1,1)))(x)return x, ydef yolo_body(inputs, num_anchors, num_classes):"""Create YOLO_V3 model CNN body in Keras."""darknet = Model(inputs, darknet_body(inputs))x, y1 = make_last_layers(darknet.output, 512, num_anchors*(num_classes+5))x = compose(DarknetConv2D_BN_Leaky(256, (1,1)),UpSampling2D(2))(x)x = Concatenate()([x,darknet.layers[152].output])x, y2 = make_last_layers(x, 256, num_anchors*(num_classes+5))x = compose(DarknetConv2D_BN_Leaky(128, (1,1)),UpSampling2D(2))(x)x = Concatenate()([x,darknet.layers[92].output])x, y3 = make_last_layers(x, 128, num_anchors*(num_classes+5))return Model(inputs, [y1,y2,y3])def tiny_yolo_body(inputs, num_anchors, num_classes):'''Create Tiny YOLO_v3 model CNN body in keras.'''x1 = compose(DarknetConv2D_BN_Leaky(16, (3,3)),MaxPooling2D(pool_size=(2,2), strides=(2,2), padding='same'),DarknetConv2D_BN_Leaky(32, (3,3)),MaxPooling2D(pool_size=(2,2), strides=(2,2), padding='same'),DarknetConv2D_BN_Leaky(64, (3,3)),MaxPooling2D(pool_size=(2,2), strides=(2,2), padding='same'),DarknetConv2D_BN_Leaky(128, (3,3)),MaxPooling2D(pool_size=(2,2), strides=(2,2), padding='same'),DarknetConv2D_BN_Leaky(256, (3,3)))(inputs)x2 = compose(MaxPooling2D(pool_size=(2,2), strides=(2,2), padding='same'),DarknetConv2D_BN_Leaky(512, (3,3)),MaxPooling2D(pool_size=(2,2), strides=(1,1), padding='same'),DarknetConv2D_BN_Leaky(1024, (3,3)),DarknetConv2D_BN_Leaky(256, (1,1)))(x1)y1 = compose(DarknetConv2D_BN_Leaky(512, (3,3)),DarknetConv2D(num_anchors*(num_classes+5), (1,1)))(x2)x2 = compose(DarknetConv2D_BN_Leaky(128, (1,1)),UpSampling2D(2))(x2)y2 = compose(Concatenate(),DarknetConv2D_BN_Leaky(256, (3,3)),DarknetConv2D(num_anchors*(num_classes+5), (1,1)))([x2,x1])return Model(inputs, [y1,y2])def yolo_head(feats, anchors, num_classes, input_shape, calc_loss=False):"""Convert final layer features to bounding box parameters."""num_anchors = len(anchors)# Reshape to batch, height, width, num_anchors, box_params.anchors_tensor = K.reshape(K.constant(anchors), [1, 1, 1, num_anchors, 2])grid_shape = K.shape(feats)[1:3] # height, widthgrid_y = K.tile(K.reshape(K.arange(0, stop=grid_shape[0]), [-1, 1, 1, 1]),[1, grid_shape[1], 1, 1])grid_x = K.tile(K.reshape(K.arange(0, stop=grid_shape[1]), [1, -1, 1, 1]),[grid_shape[0], 1, 1, 1])grid = K.concatenate([grid_x, grid_y])grid = K.cast(grid, K.dtype(feats))feats = K.reshape(feats, [-1, grid_shape[0], grid_shape[1], num_anchors, num_classes + 5])# Adjust preditions to each spatial grid point and anchor size.box_xy = (K.sigmoid(feats[..., :2]) + grid) / K.cast(grid_shape[::-1], K.dtype(feats))box_wh = K.exp(feats[..., 2:4]) * anchors_tensor / K.cast(input_shape[::-1], K.dtype(feats))box_confidence = K.sigmoid(feats[..., 4:5])box_class_probs = K.sigmoid(feats[..., 5:])if calc_loss == True:return grid, feats, box_xy, box_whreturn box_xy, box_wh, box_confidence, box_class_probsdef yolo_correct_boxes(box_xy, box_wh, input_shape, image_shape):'''Get corrected boxes'''box_yx = box_xy[..., ::-1]box_hw = box_wh[..., ::-1]input_shape = K.cast(input_shape, K.dtype(box_yx))image_shape = K.cast(image_shape, K.dtype(box_yx))new_shape = K.round(image_shape * K.min(input_shape/image_shape))offset = (input_shape-new_shape)/2./input_shapescale = input_shape/new_shapebox_yx = (box_yx - offset) * scalebox_hw *= scalebox_mins = box_yx - (box_hw / 2.)box_maxes = box_yx + (box_hw / 2.)boxes = K.concatenate([box_mins[..., 0:1], # y_minbox_mins[..., 1:2], # x_minbox_maxes[..., 0:1], # y_maxbox_maxes[..., 1:2] # x_max])# Scale boxes back to original image shape.boxes *= K.concatenate([image_shape, image_shape])return boxesdef yolo_boxes_and_scores(feats, anchors, num_classes, input_shape, image_shape):'''Process Conv layer output'''box_xy, box_wh, box_confidence, box_class_probs = yolo_head(feats,anchors, num_classes, input_shape)boxes = yolo_correct_boxes(box_xy, box_wh, input_shape, image_shape)boxes = K.reshape(boxes, [-1, 4])box_scores = box_confidence * box_class_probsbox_scores = K.reshape(box_scores, [-1, num_classes])return boxes, box_scoresdef yolo_eval(yolo_outputs,anchors,num_classes,image_shape,max_boxes=20,score_threshold=.6,iou_threshold=.5):"""Evaluate YOLO model on given input and return filtered boxes."""num_layers = len(yolo_outputs)anchor_mask = [[6,7,8], [3,4,5], [0,1,2]] if num_layers==3 else [[3,4,5], [1,2,3]] # default settinginput_shape = K.shape(yolo_outputs[0])[1:3] * 32boxes = []box_scores = []for l in range(num_layers):_boxes, _box_scores = yolo_boxes_and_scores(yolo_outputs[l],anchors[anchor_mask[l]], num_classes, input_shape, image_shape)boxes.append(_boxes)box_scores.append(_box_scores)boxes = K.concatenate(boxes, axis=0)box_scores = K.concatenate(box_scores, axis=0)mask = box_scores >= score_thresholdmax_boxes_tensor = K.constant(max_boxes, dtype='int32')boxes_ = []scores_ = []classes_ = []for c in range(num_classes):# TODO: use keras backend instead of tf.class_boxes = tf.boolean_mask(boxes, mask[:, c])class_box_scores = tf.boolean_mask(box_scores[:, c], mask[:, c])nms_index = tf.image.non_max_suppression(class_boxes, class_box_scores, max_boxes_tensor, iou_threshold=iou_threshold)class_boxes = K.gather(class_boxes, nms_index)class_box_scores = K.gather(class_box_scores, nms_index)classes = K.ones_like(class_box_scores, 'int32') * cboxes_.append(class_boxes)scores_.append(class_box_scores)classes_.append(classes)boxes_ = K.concatenate(boxes_, axis=0)scores_ = K.concatenate(scores_, axis=0)classes_ = K.concatenate(classes_, axis=0)return boxes_, scores_, classes_def preprocess_true_boxes(true_boxes, input_shape, anchors, num_classes):'''Preprocess true boxes to training input formatParameters----------true_boxes: array, shape=(m, T, 5)Absolute x_min, y_min, x_max, y_max, class_id relative to input_shape.input_shape: array-like, hw, multiples of 32anchors: array, shape=(N, 2), whnum_classes: integerReturns-------y_true: list of array, shape like yolo_outputs, xywh are reletive value'''assert (true_boxes[..., 4]<num_classes).all(), 'class id must be less than num_classes'num_layers = len(anchors)//3 # default settinganchor_mask = [[6,7,8], [3,4,5], [0,1,2]] if num_layers==3 else [[3,4,5], [1,2,3]]true_boxes = np.array(true_boxes, dtype='float32')input_shape = np.array(input_shape, dtype='int32')boxes_xy = (true_boxes[..., 0:2] + true_boxes[..., 2:4]) // 2boxes_wh = true_boxes[..., 2:4] - true_boxes[..., 0:2]true_boxes[..., 0:2] = boxes_xy/input_shape[::-1]true_boxes[..., 2:4] = boxes_wh/input_shape[::-1]m = true_boxes.shape[0]grid_shapes = [input_shape//{0:32, 1:16, 2:8}[l] for l in range(num_layers)]y_true = [np.zeros((m,grid_shapes[l][0],grid_shapes[l][1],len(anchor_mask[l]),5+num_classes),dtype='float32') for l in range(num_layers)]# Expand dim to apply broadcasting.anchors = np.expand_dims(anchors, 0)anchor_maxes = anchors / 2.anchor_mins = -anchor_maxesvalid_mask = boxes_wh[..., 0]>0for b in range(m):# Discard zero rows.wh = boxes_wh[b, valid_mask[b]]if len(wh)==0: continue# Expand dim to apply broadcasting.wh = np.expand_dims(wh, -2)box_maxes = wh / 2.box_mins = -box_maxesintersect_mins = np.maximum(box_mins, anchor_mins)intersect_maxes = np.minimum(box_maxes, anchor_maxes)intersect_wh = np.maximum(intersect_maxes - intersect_mins, 0.)intersect_area = intersect_wh[..., 0] * intersect_wh[..., 1]box_area = wh[..., 0] * wh[..., 1]anchor_area = anchors[..., 0] * anchors[..., 1]iou = intersect_area / (box_area + anchor_area - intersect_area)# Find best anchor for each true boxbest_anchor = np.argmax(iou, axis=-1)for t, n in enumerate(best_anchor):for l in range(num_layers):if n in anchor_mask[l]:i = np.floor(true_boxes[b,t,0]*grid_shapes[l][1]).astype('int32')j = np.floor(true_boxes[b,t,1]*grid_shapes[l][0]).astype('int32')k = anchor_mask[l].index(n)c = true_boxes[b,t, 4].astype('int32')y_true[l][b, j, i, k, 0:4] = true_boxes[b,t, 0:4]y_true[l][b, j, i, k, 4] = 1y_true[l][b, j, i, k, 5+c] = 1return y_truedef box_iou(b1, b2):'''Return iou tensorParameters----------b1: tensor, shape=(i1,...,iN, 4), xywhb2: tensor, shape=(j, 4), xywhReturns-------iou: tensor, shape=(i1,...,iN, j)'''# Expand dim to apply broadcasting.b1 = K.expand_dims(b1, -2)b1_xy = b1[..., :2]b1_wh = b1[..., 2:4]b1_wh_half = b1_wh/2.b1_mins = b1_xy - b1_wh_halfb1_maxes = b1_xy + b1_wh_half# Expand dim to apply broadcasting.b2 = K.expand_dims(b2, 0)b2_xy = b2[..., :2]b2_wh = b2[..., 2:4]b2_wh_half = b2_wh/2.b2_mins = b2_xy - b2_wh_halfb2_maxes = b2_xy + b2_wh_halfintersect_mins = K.maximum(b1_mins, b2_mins)intersect_maxes = K.minimum(b1_maxes, b2_maxes)intersect_wh = K.maximum(intersect_maxes - intersect_mins, 0.)intersect_area = intersect_wh[..., 0] * intersect_wh[..., 1]b1_area = b1_wh[..., 0] * b1_wh[..., 1]b2_area = b2_wh[..., 0] * b2_wh[..., 1]iou = intersect_area / (b1_area + b2_area - intersect_area)return ioudef yolo_loss(args, anchors, num_classes, ignore_thresh=.5, print_loss=False):'''Return yolo_loss tensorParameters----------yolo_outputs: list of tensor, the output of yolo_body or tiny_yolo_bodyy_true: list of array, the output of preprocess_true_boxesanchors: array, shape=(N, 2), whnum_classes: integerignore_thresh: float, the iou threshold whether to ignore object confidence lossReturns-------loss: tensor, shape=(1,)'''num_layers = len(anchors)//3 # default settingyolo_outputs = args[:num_layers]y_true = args[num_layers:]anchor_mask = [[6,7,8], [3,4,5], [0,1,2]] if num_layers==3 else [[3,4,5], [1,2,3]]input_shape = K.cast(K.shape(yolo_outputs[0])[1:3] * 32, K.dtype(y_true[0]))grid_shapes = [K.cast(K.shape(yolo_outputs[l])[1:3], K.dtype(y_true[0])) for l in range(num_layers)]loss = 0m = K.shape(yolo_outputs[0])[0] # batch size, tensormf = K.cast(m, K.dtype(yolo_outputs[0]))for l in range(num_layers):object_mask = y_true[l][..., 4:5]true_class_probs = y_true[l][..., 5:]grid, raw_pred, pred_xy, pred_wh = yolo_head(yolo_outputs[l],anchors[anchor_mask[l]], num_classes, input_shape, calc_loss=True)pred_box = K.concatenate([pred_xy, pred_wh])# Darknet raw box to calculate loss.raw_true_xy = y_true[l][..., :2]*grid_shapes[l][::-1] - gridraw_true_wh = K.log(y_true[l][..., 2:4] / anchors[anchor_mask[l]] * input_shape[::-1])raw_true_wh = K.switch(object_mask, raw_true_wh, K.zeros_like(raw_true_wh)) # avoid log(0)=-infbox_loss_scale = 2 - y_true[l][...,2:3]*y_true[l][...,3:4]# Find ignore mask, iterate over each of batch.ignore_mask = tf.TensorArray(K.dtype(y_true[0]), size=1, dynamic_size=True)object_mask_bool = K.cast(object_mask, 'bool')def loop_body(b, ignore_mask):true_box = tf.boolean_mask(y_true[l][b,...,0:4], object_mask_bool[b,...,0])iou = box_iou(pred_box[b], true_box)best_iou = K.max(iou, axis=-1)ignore_mask = ignore_mask.write(b, K.cast(best_iou<ignore_thresh, K.dtype(true_box)))return b+1, ignore_mask_, ignore_mask = K.control_flow_ops.while_loop(lambda b,*args: b<m, loop_body, [0, ignore_mask])ignore_mask = ignore_mask.stack()ignore_mask = K.expand_dims(ignore_mask, -1)# K.binary_crossentropy is helpful to avoid exp overflow.xy_loss = object_mask * box_loss_scale * K.binary_crossentropy(raw_true_xy, raw_pred[...,0:2], from_logits=True)wh_loss = object_mask * box_loss_scale * 0.5 * K.square(raw_true_wh-raw_pred[...,2:4])confidence_loss = object_mask * K.binary_crossentropy(object_mask, raw_pred[...,4:5], from_logits=True)+ \(1-object_mask) * K.binary_crossentropy(object_mask, raw_pred[...,4:5], from_logits=True) * ignore_maskclass_loss = object_mask * K.binary_crossentropy(true_class_probs, raw_pred[...,5:], from_logits=True)xy_loss = K.sum(xy_loss) / mfwh_loss = K.sum(wh_loss) / mfconfidence_loss = K.sum(confidence_loss) / mfclass_loss = K.sum(class_loss) / mfloss += xy_loss + wh_loss + confidence_loss + class_lossif print_loss:loss = tf.Print(loss, [loss, xy_loss, wh_loss, confidence_loss, class_loss, K.sum(ignore_mask)], message='loss: ')return loss

3、訓練train.py文件

圖像尺寸也是416*416，訓練過程中，也可以保存權重。

""" Retrain the YOLO model for your own dataset. """import numpy as np import keras.backend as K from keras.layers import Input, Lambda from keras.models import Model from keras.optimizers import Adam from keras.callbacks import TensorBoard, ModelCheckpoint, ReduceLROnPlateau, EarlyStoppingfrom yolo3.model import preprocess_true_boxes, yolo_body, tiny_yolo_body, yolo_loss from yolo3.utils import get_random_datadef _main():annotation_path = 'train.txt'log_dir = 'logs/000/'classes_path = 'model_data/voc_classes.txt'anchors_path = 'model_data/yolo_anchors.txt'class_names = get_classes(classes_path)num_classes = len(class_names)anchors = get_anchors(anchors_path)input_shape = (416,416) # multiple of 32, hwis_tiny_version = len(anchors)==6 # default settingif is_tiny_version:model = create_tiny_model(input_shape, anchors, num_classes,freeze_body=2, weights_path='model_data/tiny_yolo_weights.h5')else:model = create_model(input_shape, anchors, num_classes,freeze_body=2, weights_path='model_data/yolo_weights.h5') # make sure you know what you freezelogging = TensorBoard(log_dir=log_dir)checkpoint = ModelCheckpoint(log_dir + 'ep{epoch:03d}-loss{loss:.3f}-val_loss{val_loss:.3f}.h5',monitor='val_loss', save_weights_only=True, save_best_only=True, period=3)reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.1, patience=3, verbose=1)early_stopping = EarlyStopping(monitor='val_loss', min_delta=0, patience=10, verbose=1)val_split = 0.1with open(annotation_path) as f:lines = f.readlines()np.random.seed(10101)np.random.shuffle(lines)np.random.seed(None)num_val = int(len(lines)*val_split)num_train = len(lines) - num_val# Train with frozen layers first, to get a stable loss.# Adjust num epochs to your dataset. This step is enough to obtain a not bad model.if True:model.compile(optimizer=Adam(lr=1e-3), loss={# use custom yolo_loss Lambda layer.'yolo_loss': lambda y_true, y_pred: y_pred})batch_size = 32print('Train on {} samples, val on {} samples, with batch size {}.'.format(num_train, num_val, batch_size))model.fit_generator(data_generator_wrapper(lines[:num_train], batch_size, input_shape, anchors, num_classes),steps_per_epoch=max(1, num_train//batch_size),validation_data=data_generator_wrapper(lines[num_train:], batch_size, input_shape, anchors, num_classes),validation_steps=max(1, num_val//batch_size),epochs=50,initial_epoch=0,callbacks=[logging, checkpoint])model.save_weights(log_dir + 'trained_weights_stage_1.h5')# Unfreeze and continue training, to fine-tune.# Train longer if the result is not good.if True:for i in range(len(model.layers)):model.layers[i].trainable = Truemodel.compile(optimizer=Adam(lr=1e-4), loss={'yolo_loss': lambda y_true, y_pred: y_pred}) # recompile to apply the changeprint('Unfreeze all of the layers.')batch_size = 32 # note that more GPU memory is required after unfreezing the bodyprint('Train on {} samples, val on {} samples, with batch size {}.'.format(num_train, num_val, batch_size))model.fit_generator(data_generator_wrapper(lines[:num_train], batch_size, input_shape, anchors, num_classes),steps_per_epoch=max(1, num_train//batch_size),validation_data=data_generator_wrapper(lines[num_train:], batch_size, input_shape, anchors, num_classes),validation_steps=max(1, num_val//batch_size),epochs=100,initial_epoch=50,callbacks=[logging, checkpoint, reduce_lr, early_stopping])model.save_weights(log_dir + 'trained_weights_final.h5')# Further training if needed.def get_classes(classes_path):'''loads the classes'''with open(classes_path) as f:class_names = f.readlines()class_names = [c.strip() for c in class_names]return class_namesdef get_anchors(anchors_path):'''loads the anchors from a file'''with open(anchors_path) as f:anchors = f.readline()anchors = [float(x) for x in anchors.split(',')]return np.array(anchors).reshape(-1, 2)def create_model(input_shape, anchors, num_classes, load_pretrained=True, freeze_body=2,weights_path='model_data/yolo_weights.h5'):'''create the training model'''K.clear_session() # get a new sessionimage_input = Input(shape=(None, None, 3))h, w = input_shapenum_anchors = len(anchors)y_true = [Input(shape=(h//{0:32, 1:16, 2:8}[l], w//{0:32, 1:16, 2:8}[l], \num_anchors//3, num_classes+5)) for l in range(3)]model_body = yolo_body(image_input, num_anchors//3, num_classes)print('Create YOLOv3 model with {} anchors and {} classes.'.format(num_anchors, num_classes))if load_pretrained:model_body.load_weights(weights_path, by_name=True, skip_mismatch=True)print('Load weights {}.'.format(weights_path))if freeze_body in [1, 2]:# Freeze darknet53 body or freeze all but 3 output layers.num = (185, len(model_body.layers)-3)[freeze_body-1]for i in range(num): model_body.layers[i].trainable = Falseprint('Freeze the first {} layers of total {} layers.'.format(num, len(model_body.layers)))model_loss = Lambda(yolo_loss, output_shape=(1,), name='yolo_loss',arguments={'anchors': anchors, 'num_classes': num_classes, 'ignore_thresh': 0.5})([*model_body.output, *y_true])model = Model([model_body.input, *y_true], model_loss)return modeldef create_tiny_model(input_shape, anchors, num_classes, load_pretrained=True, freeze_body=2,weights_path='model_data/tiny_yolo_weights.h5'):'''create the training model, for Tiny YOLOv3'''K.clear_session() # get a new sessionimage_input = Input(shape=(None, None, 3))h, w = input_shapenum_anchors = len(anchors)y_true = [Input(shape=(h//{0:32, 1:16}[l], w//{0:32, 1:16}[l], \num_anchors//2, num_classes+5)) for l in range(2)]model_body = tiny_yolo_body(image_input, num_anchors//2, num_classes)print('Create Tiny YOLOv3 model with {} anchors and {} classes.'.format(num_anchors, num_classes))if load_pretrained:model_body.load_weights(weights_path, by_name=True, skip_mismatch=True)print('Load weights {}.'.format(weights_path))if freeze_body in [1, 2]:# Freeze the darknet body or freeze all but 2 output layers.num = (20, len(model_body.layers)-2)[freeze_body-1]for i in range(num): model_body.layers[i].trainable = Falseprint('Freeze the first {} layers of total {} layers.'.format(num, len(model_body.layers)))model_loss = Lambda(yolo_loss, output_shape=(1,), name='yolo_loss',arguments={'anchors': anchors, 'num_classes': num_classes, 'ignore_thresh': 0.7})([*model_body.output, *y_true])model = Model([model_body.input, *y_true], model_loss)return modeldef data_generator(annotation_lines, batch_size, input_shape, anchors, num_classes):'''data generator for fit_generator'''n = len(annotation_lines)i = 0while True:image_data = []box_data = []for b in range(batch_size):if i==0:np.random.shuffle(annotation_lines)image, box = get_random_data(annotation_lines[i], input_shape, random=True)image_data.append(image)box_data.append(box)i = (i+1) % nimage_data = np.array(image_data)box_data = np.array(box_data)y_true = preprocess_true_boxes(box_data, input_shape, anchors, num_classes)yield [image_data, *y_true], np.zeros(batch_size)def data_generator_wrapper(annotation_lines, batch_size, input_shape, anchors, num_classes):n = len(annotation_lines)if n==0 or batch_size<=0: return Nonereturn data_generator(annotation_lines, batch_size, input_shape, anchors, num_classes)if __name__ == '__main__':_main()

總結

以上是生活随笔為你收集整理的DL之Yolov3：基于深度学习Yolov3算法实现视频目标检测之对《我要打篮球》视频段进行实时目标检测的全部內容，希望文章能夠幫你解決所遇到的問題。

如果覺得生活随笔網站內容還不錯，歡迎將生活随笔推薦給好友。

上一篇： Paper：2017年的Google机器
下一篇：成功解决AttributeError: