DL之Yolov3:基于深度学习Yolov3算法实现视频目标检测之对《我要打篮球》视频段进行实时目标检测
DL之Yolov3:基于深度學習Yolov3算法實現視頻目標檢測之對《我要打籃球》視頻段進行實時目標檢測
?
?
?
?
目錄
輸出結果
設計思路
核心代碼
?
?
?
?
相關文章
成功解決AttributeError: 'NoneType' object has no attribute '__array_interface__'
輸出結果
更新……
?
?
?
?
設計思路
DL之YoloV3:Yolo V3算法的簡介(論文介紹)、架構詳解、案例應用等配圖集合之詳細攻略
?
?
?
?
核心代碼
更新……
Yolov3代碼實現的時候,是將輸入的視頻分為幀圖像進行目標檢測的!
1、yolo.py文件
# -*- coding: utf-8 -*- """ Class definition of YOLO_v3 style detection model on image and video """import colorsys import os from timeit import default_timer as timerimport numpy as np from keras import backend as K from keras.models import load_model from keras.layers import Input from PIL import Image, ImageFont, ImageDrawfrom yolo3.model import yolo_eval, yolo_body, tiny_yolo_body from yolo3.utils import letterbox_image import os from keras.utils import multi_gpu_modelclass YOLO(object):_defaults = {"model_path": 'model_data/yolo.h5',"anchors_path": 'model_data/yolo_anchors.txt',"classes_path": 'model_data/coco_classes.txt',"score" : 0.3,"iou" : 0.45,"model_image_size" : (416, 416),"gpu_num" : 1,}@classmethoddef get_defaults(cls, n):if n in cls._defaults:return cls._defaults[n]else:return "Unrecognized attribute name '" + n + "'"def __init__(self, **kwargs):self.__dict__.update(self._defaults) # set up default valuesself.__dict__.update(kwargs) # and update with user overridesself.class_names = self._get_class()self.anchors = self._get_anchors()self.sess = K.get_session()self.boxes, self.scores, self.classes = self.generate()def _get_class(self):classes_path = os.path.expanduser(self.classes_path)with open(classes_path) as f:class_names = f.readlines()class_names = [c.strip() for c in class_names]return class_namesdef _get_anchors(self):anchors_path = os.path.expanduser(self.anchors_path)with open(anchors_path) as f:anchors = f.readline()anchors = [float(x) for x in anchors.split(',')]return np.array(anchors).reshape(-1, 2)def generate(self):model_path = os.path.expanduser(self.model_path)assert model_path.endswith('.h5'), 'Keras model or weights must be a .h5 file.'# Load model, or construct model and load weights.num_anchors = len(self.anchors)num_classes = len(self.class_names)is_tiny_version = num_anchors==6 # default settingtry:self.yolo_model = load_model(model_path, compile=False)except:self.yolo_model = tiny_yolo_body(Input(shape=(None,None,3)), num_anchors//2, num_classes) \if is_tiny_version else yolo_body(Input(shape=(None,None,3)), num_anchors//3, num_classes)self.yolo_model.load_weights(self.model_path) # make sure model, anchors and classes matchelse:assert self.yolo_model.layers[-1].output_shape[-1] == \num_anchors/len(self.yolo_model.output) * (num_classes + 5), \'Mismatch between model and given anchor and class sizes'print('{} model, anchors, and classes loaded.'.format(model_path))# Generate colors for drawing bounding boxes.hsv_tuples = [(x / len(self.class_names), 1., 1.)for x in range(len(self.class_names))]self.colors = list(map(lambda x: colorsys.hsv_to_rgb(*x), hsv_tuples))self.colors = list(map(lambda x: (int(x[0] * 255), int(x[1] * 255), int(x[2] * 255)),self.colors))np.random.seed(10101) # Fixed seed for consistent colors across runs.np.random.shuffle(self.colors) # Shuffle colors to decorrelate adjacent classes.np.random.seed(None) # Reset seed to default.# Generate output tensor targets for filtered bounding boxes.self.input_image_shape = K.placeholder(shape=(2, ))if self.gpu_num>=2:self.yolo_model = multi_gpu_model(self.yolo_model, gpus=self.gpu_num)boxes, scores, classes = yolo_eval(self.yolo_model.output, self.anchors,len(self.class_names), self.input_image_shape,score_threshold=self.score, iou_threshold=self.iou)return boxes, scores, classesdef detect_image(self, image):start = timer()if self.model_image_size != (None, None):assert self.model_image_size[0]%32 == 0, 'Multiples of 32 required'assert self.model_image_size[1]%32 == 0, 'Multiples of 32 required'boxed_image = letterbox_image(image, tuple(reversed(self.model_image_size)))else:new_image_size = (image.width - (image.width % 32),image.height - (image.height % 32))boxed_image = letterbox_image(image, new_image_size)image_data = np.array(boxed_image, dtype='float32')print(image_data.shape)image_data /= 255.image_data = np.expand_dims(image_data, 0) # Add batch dimension.out_boxes, out_scores, out_classes = self.sess.run([self.boxes, self.scores, self.classes],feed_dict={self.yolo_model.input: image_data,self.input_image_shape: [image.size[1], image.size[0]],K.learning_phase(): 0})print('Found {} boxes for {}'.format(len(out_boxes), 'img'))font = ImageFont.truetype(font='font/FiraMono-Medium.otf',size=np.floor(3e-2 * image.size[1] + 0.5).astype('int32'))thickness = (image.size[0] + image.size[1]) // 300for i, c in reversed(list(enumerate(out_classes))):predicted_class = self.class_names[c]box = out_boxes[i]score = out_scores[i]label = '{} {:.2f}'.format(predicted_class, score)draw = ImageDraw.Draw(image)label_size = draw.textsize(label, font)top, left, bottom, right = boxtop = max(0, np.floor(top + 0.5).astype('int32'))left = max(0, np.floor(left + 0.5).astype('int32'))bottom = min(image.size[1], np.floor(bottom + 0.5).astype('int32'))right = min(image.size[0], np.floor(right + 0.5).astype('int32'))print(label, (left, top), (right, bottom))if top - label_size[1] >= 0:text_origin = np.array([left, top - label_size[1]])else:text_origin = np.array([left, top + 1])# My kingdom for a good redistributable image drawing library.for i in range(thickness):draw.rectangle([left + i, top + i, right - i, bottom - i],outline=self.colors[c])draw.rectangle([tuple(text_origin), tuple(text_origin + label_size)],fill=self.colors[c])draw.text(text_origin, label, fill=(0, 0, 0), font=font)del drawend = timer()print(end - start)return imagedef close_session(self):self.sess.close()def detect_video(yolo, video_path, output_path=""):import cv2vid = cv2.VideoCapture(video_path)if not vid.isOpened():raise IOError("Couldn't open webcam or video")video_FourCC = int(vid.get(cv2.CAP_PROP_FOURCC))video_fps = vid.get(cv2.CAP_PROP_FPS)video_size = (int(vid.get(cv2.CAP_PROP_FRAME_WIDTH)),int(vid.get(cv2.CAP_PROP_FRAME_HEIGHT)))isOutput = True if output_path != "" else Falseif isOutput:print("!!! TYPE:", type(output_path), type(video_FourCC), type(video_fps), type(video_size))out = cv2.VideoWriter(output_path, video_FourCC, video_fps, video_size)accum_time = 0curr_fps = 0fps = "FPS: ??"prev_time = timer()while True:return_value, frame = vid.read()image = Image.fromarray(frame)image = yolo.detect_image(image)result = np.asarray(image)curr_time = timer()exec_time = curr_time - prev_timeprev_time = curr_timeaccum_time = accum_time + exec_timecurr_fps = curr_fps + 1if accum_time > 1:accum_time = accum_time - 1fps = "FPS: " + str(curr_fps)curr_fps = 0cv2.putText(result, text=fps, org=(3, 15), fontFace=cv2.FONT_HERSHEY_SIMPLEX,fontScale=0.50, color=(255, 0, 0), thickness=2)cv2.namedWindow("result", cv2.WINDOW_NORMAL)cv2.imshow("result", result)if isOutput:out.write(result)if cv2.waitKey(1) & 0xFF == ord('q'):breakyolo.close_session()2、model.py文件
? ? ? 上邊網絡模型的代碼定義,比如DarknetConv2D、DarknetConv2D_BN_Leaky、resblock_body、darknet_body、make_last_layers、yolo_body、yolo_head、yolo_eval等函數,其中preprocess_true_boxes()函數找出人工標定框GT框,通過計算候選框和GT框的IOU,才會找出best_abchor。
? ? ? ?可以參考網絡模型圖去理解。詳見DL之Yolov3:基于深度學習Yolov3算法實現視頻目標檢測之對《我要打籃球》視頻段進行實時目標檢測?的model.py文件。
3、訓練train.py文件
圖像尺寸也是416*416,訓練過程中,也可以保存權重。
""" Retrain the YOLO model for your own dataset. """import numpy as np import keras.backend as K from keras.layers import Input, Lambda from keras.models import Model from keras.optimizers import Adam from keras.callbacks import TensorBoard, ModelCheckpoint, ReduceLROnPlateau, EarlyStoppingfrom yolo3.model import preprocess_true_boxes, yolo_body, tiny_yolo_body, yolo_loss from yolo3.utils import get_random_datadef _main():annotation_path = 'train.txt'log_dir = 'logs/000/'classes_path = 'model_data/voc_classes.txt'anchors_path = 'model_data/yolo_anchors.txt'class_names = get_classes(classes_path)num_classes = len(class_names)anchors = get_anchors(anchors_path)input_shape = (416,416) # multiple of 32, hwis_tiny_version = len(anchors)==6 # default settingif is_tiny_version:model = create_tiny_model(input_shape, anchors, num_classes,freeze_body=2, weights_path='model_data/tiny_yolo_weights.h5')else:model = create_model(input_shape, anchors, num_classes,freeze_body=2, weights_path='model_data/yolo_weights.h5') # make sure you know what you freezelogging = TensorBoard(log_dir=log_dir)checkpoint = ModelCheckpoint(log_dir + 'ep{epoch:03d}-loss{loss:.3f}-val_loss{val_loss:.3f}.h5',monitor='val_loss', save_weights_only=True, save_best_only=True, period=3)reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.1, patience=3, verbose=1)early_stopping = EarlyStopping(monitor='val_loss', min_delta=0, patience=10, verbose=1)val_split = 0.1with open(annotation_path) as f:lines = f.readlines()np.random.seed(10101)np.random.shuffle(lines)np.random.seed(None)num_val = int(len(lines)*val_split)num_train = len(lines) - num_val# Train with frozen layers first, to get a stable loss.# Adjust num epochs to your dataset. This step is enough to obtain a not bad model.if True:model.compile(optimizer=Adam(lr=1e-3), loss={# use custom yolo_loss Lambda layer.'yolo_loss': lambda y_true, y_pred: y_pred})batch_size = 32print('Train on {} samples, val on {} samples, with batch size {}.'.format(num_train, num_val, batch_size))model.fit_generator(data_generator_wrapper(lines[:num_train], batch_size, input_shape, anchors, num_classes),steps_per_epoch=max(1, num_train//batch_size),validation_data=data_generator_wrapper(lines[num_train:], batch_size, input_shape, anchors, num_classes),validation_steps=max(1, num_val//batch_size),epochs=50,initial_epoch=0,callbacks=[logging, checkpoint])model.save_weights(log_dir + 'trained_weights_stage_1.h5')# Unfreeze and continue training, to fine-tune.# Train longer if the result is not good.if True:for i in range(len(model.layers)):model.layers[i].trainable = Truemodel.compile(optimizer=Adam(lr=1e-4), loss={'yolo_loss': lambda y_true, y_pred: y_pred}) # recompile to apply the changeprint('Unfreeze all of the layers.')batch_size = 32 # note that more GPU memory is required after unfreezing the bodyprint('Train on {} samples, val on {} samples, with batch size {}.'.format(num_train, num_val, batch_size))model.fit_generator(data_generator_wrapper(lines[:num_train], batch_size, input_shape, anchors, num_classes),steps_per_epoch=max(1, num_train//batch_size),validation_data=data_generator_wrapper(lines[num_train:], batch_size, input_shape, anchors, num_classes),validation_steps=max(1, num_val//batch_size),epochs=100,initial_epoch=50,callbacks=[logging, checkpoint, reduce_lr, early_stopping])model.save_weights(log_dir + 'trained_weights_final.h5')# Further training if needed.def get_classes(classes_path):'''loads the classes'''with open(classes_path) as f:class_names = f.readlines()class_names = [c.strip() for c in class_names]return class_namesdef get_anchors(anchors_path):'''loads the anchors from a file'''with open(anchors_path) as f:anchors = f.readline()anchors = [float(x) for x in anchors.split(',')]return np.array(anchors).reshape(-1, 2)def create_model(input_shape, anchors, num_classes, load_pretrained=True, freeze_body=2,weights_path='model_data/yolo_weights.h5'):'''create the training model'''K.clear_session() # get a new sessionimage_input = Input(shape=(None, None, 3))h, w = input_shapenum_anchors = len(anchors)y_true = [Input(shape=(h//{0:32, 1:16, 2:8}[l], w//{0:32, 1:16, 2:8}[l], \num_anchors//3, num_classes+5)) for l in range(3)]model_body = yolo_body(image_input, num_anchors//3, num_classes)print('Create YOLOv3 model with {} anchors and {} classes.'.format(num_anchors, num_classes))if load_pretrained:model_body.load_weights(weights_path, by_name=True, skip_mismatch=True)print('Load weights {}.'.format(weights_path))if freeze_body in [1, 2]:# Freeze darknet53 body or freeze all but 3 output layers.num = (185, len(model_body.layers)-3)[freeze_body-1]for i in range(num): model_body.layers[i].trainable = Falseprint('Freeze the first {} layers of total {} layers.'.format(num, len(model_body.layers)))model_loss = Lambda(yolo_loss, output_shape=(1,), name='yolo_loss',arguments={'anchors': anchors, 'num_classes': num_classes, 'ignore_thresh': 0.5})([*model_body.output, *y_true])model = Model([model_body.input, *y_true], model_loss)return modeldef create_tiny_model(input_shape, anchors, num_classes, load_pretrained=True, freeze_body=2,weights_path='model_data/tiny_yolo_weights.h5'):'''create the training model, for Tiny YOLOv3'''K.clear_session() # get a new sessionimage_input = Input(shape=(None, None, 3))h, w = input_shapenum_anchors = len(anchors)y_true = [Input(shape=(h//{0:32, 1:16}[l], w//{0:32, 1:16}[l], \num_anchors//2, num_classes+5)) for l in range(2)]model_body = tiny_yolo_body(image_input, num_anchors//2, num_classes)print('Create Tiny YOLOv3 model with {} anchors and {} classes.'.format(num_anchors, num_classes))if load_pretrained:model_body.load_weights(weights_path, by_name=True, skip_mismatch=True)print('Load weights {}.'.format(weights_path))if freeze_body in [1, 2]:# Freeze the darknet body or freeze all but 2 output layers.num = (20, len(model_body.layers)-2)[freeze_body-1]for i in range(num): model_body.layers[i].trainable = Falseprint('Freeze the first {} layers of total {} layers.'.format(num, len(model_body.layers)))model_loss = Lambda(yolo_loss, output_shape=(1,), name='yolo_loss',arguments={'anchors': anchors, 'num_classes': num_classes, 'ignore_thresh': 0.7})([*model_body.output, *y_true])model = Model([model_body.input, *y_true], model_loss)return modeldef data_generator(annotation_lines, batch_size, input_shape, anchors, num_classes):'''data generator for fit_generator'''n = len(annotation_lines)i = 0while True:image_data = []box_data = []for b in range(batch_size):if i==0:np.random.shuffle(annotation_lines)image, box = get_random_data(annotation_lines[i], input_shape, random=True)image_data.append(image)box_data.append(box)i = (i+1) % nimage_data = np.array(image_data)box_data = np.array(box_data)y_true = preprocess_true_boxes(box_data, input_shape, anchors, num_classes)yield [image_data, *y_true], np.zeros(batch_size)def data_generator_wrapper(annotation_lines, batch_size, input_shape, anchors, num_classes):n = len(annotation_lines)if n==0 or batch_size<=0: return Nonereturn data_generator(annotation_lines, batch_size, input_shape, anchors, num_classes)if __name__ == '__main__':_main()?
?
?
?
?
?
?
?
?
?
?
?
?
?
總結
以上是生活随笔為你收集整理的DL之Yolov3:基于深度学习Yolov3算法实现视频目标检测之对《我要打篮球》视频段进行实时目标检测的全部內容,希望文章能夠幫你解決所遇到的問題。
- 上一篇: Paper:2017年的Google机器
- 下一篇: 成功解决AttributeError: