當前位置：首頁 > 编程资源 > 编程问答 >内容正文

编程问答

将自己手动标注的数据集(PascalVOC格式)转化为.TFRecord格式

發(fā)布時間：2023/12/20 编程问答 25 豆豆

生活随笔收集整理的這篇文章主要介紹了将自己手动标注的数据集(PascalVOC格式)转化为.TFRecord格式小編覺得挺不錯的,現(xiàn)在分享給大家,幫大家做個參考.

“ 一個人如果不能學會遺忘，那將是很痛苦的事，別再自尋煩惱，快把痛苦的事給忘了吧！”

為了能夠使用Object Detection API~

需要將數(shù)據(jù)集格式轉(zhuǎn)化為.TFRecord再進行訓練~

至于，

如何使用Tensorflow官方的Object Detection API

包括下載、依賴（protobuf等）安裝、跑demo、訓練自己的數(shù)據(jù)過程~

推薦一篇博文： ?1.https://blog.csdn.net/rookie_wei/article/details/81143814

? ? ? ? ? ? ? ? ? ? ? ? ? ? ?2.https://blog.csdn.net/rookie_wei/article/details/81210499

? ? ? ? ? ? ? ? ? ? ? ? ? ? ?3.https://blog.csdn.net/rookie_wei/article/details/81275663

整個過程比較詳細，可以參考~

本篇主要介紹如何將已標注好的數(shù)據(jù)集轉(zhuǎn)化成Tensorflow通用的.TFRecord格式~

注意：本程序是我自己檢測的6類object，根據(jù)情況修改！

#-*- coding=utf-8 -*- # File Name: Create_TFRecord.py # Author: HZ # Created Time: 2018-06-06 import os import sys import randomimport numpy as np import tensorflow as tfimport xml.etree.ElementTree as ET #操作xml文件#我的標簽定義有6類，根據(jù)自己的圖片而定 VOC_LABELS = {'none': (0, 'Background'),'person': (1, 'Person'),'car': (2, 'Car'),'bus': (3, 'Bus'),'truck': (4, 'Truck'),'cyclist': (5, 'cyclist') }# 圖片和標簽存放的文件夾. DIRECTORY_ANNOTATIONS = 'Annotations/' DIRECTORY_IMAGES = 'JPEGImages/'# 隨機種子. RANDOM_SEED = 4242 #生成整數(shù)型，浮點型和字符串型的屬性 def int64_feature(value):if not isinstance(value, list):value = [value]return tf.train.Feature(int64_list=tf.train.Int64List(value=value))def float_feature(value):if not isinstance(value, list):value = [value]return tf.train.Feature(float_list=tf.train.FloatList(value=value))def bytes_feature(value):if not isinstance(value, list):value = [value]return tf.train.Feature(bytes_list=tf.train.BytesList(value=value))#圖片處理 def _process_image(directory, name):# Read the image file.filename = directory + DIRECTORY_IMAGES + name + '.jpg'image_data = tf.gfile.FastGFile(filename, 'rb').read()# Read the XML annotation file.filename = os.path.join(directory, DIRECTORY_ANNOTATIONS, name + '.xml')tree = ET.parse(filename)root = tree.getroot()# Image shape.size = root.find('size')shape = [int(size.find('height').text),int(size.find('width').text),int(size.find('depth').text)]# Find annotations.bboxes = []labels = []labels_text = []difficult = []truncated = []for obj in root.findall('object'):label = obj.find('name').textlabels.append(int(VOC_LABELS[label][0]))labels_text.append(label.encode('ascii')) #變?yōu)閍scii格式if obj.find('difficult'):difficult.append(int(obj.find('difficult').text))else:difficult.append(0)if obj.find('truncated'):truncated.append(int(obj.find('truncated').text))else:truncated.append(0)bbox = obj.find('bndbox')a=float(bbox.find('ymin').text) / shape[0]b=float(bbox.find('xmin').text) / shape[1]a1=float(bbox.find('ymax').text) / shape[0]b1=float(bbox.find('xmax').text) / shape[1]a_e=a1-ab_e=b1-bif abs(a_e)<1 and abs(b_e)<1:bboxes.append((a,b,a1,b1))return image_data, shape, bboxes, labels, labels_text, difficult, truncated#轉(zhuǎn)化樣例 def _convert_to_example(image_data, labels, labels_text, bboxes, shape,difficult, truncated):xmin = []ymin = []xmax = []ymax = []for b in bboxes:assert len(b) == 4# pylint: disable=expression-not-assigned[l.append(point) for l, point in zip([ymin, xmin, ymax, xmax], b)]# pylint: enable=expression-not-assignedimage_format = b'JPEG'example = tf.train.Example(features=tf.train.Features(feature={'image/height': int64_feature(shape[0]),'image/width': int64_feature(shape[1]),'image/channels': int64_feature(shape[2]),'image/shape': int64_feature(shape),'image/object/bbox/xmin': float_feature(xmin),'image/object/bbox/xmax': float_feature(xmax),'image/object/bbox/ymin': float_feature(ymin),'image/object/bbox/ymax': float_feature(ymax),'image/object/bbox/label': int64_feature(labels),'image/object/bbox/label_text': bytes_feature(labels_text),'image/object/bbox/difficult': int64_feature(difficult),'image/object/bbox/truncated': int64_feature(truncated),'image/format': bytes_feature(image_format),'image/encoded': bytes_feature(image_data)}))return example #增加到tfrecord def _add_to_tfrecord(dataset_dir, name, tfrecord_writer):image_data, shape, bboxes, labels, labels_text, difficult, truncated = \_process_image(dataset_dir, name)example = _convert_to_example(image_data, labels, labels_text,bboxes, shape, difficult, truncated)tfrecord_writer.write(example.SerializeToString()) #name為轉(zhuǎn)化文件的前綴 def _get_output_filename(output_dir, name, idx):return '%s/%s_%03d.tfrecord' % (output_dir, name, idx)def run(dataset_dir, output_dir, name='voc_train', shuffling=False):if not tf.gfile.Exists(dataset_dir):tf.gfile.MakeDirs(dataset_dir) path = os.path.join(dataset_dir, DIRECTORY_ANNOTATIONS)filenames = sorted(os.listdir(path)) #排序# shuffling == Ture時，打亂順序if shuffling:random.seed(RANDOM_SEED)random.shuffle(filenames)i = 0fidx = 0while i < len(filenames):# Open new TFRecord file.tf_filename = _get_output_filename(output_dir, name, fidx)with tf.python_io.TFRecordWriter(tf_filename) as tfrecord_writer:while i < len(filenames):sys.stdout.write(' Converting image %d/%d \n' % (i+1, len(filenames)))#終端打印，類似printsys.stdout.flush() #緩沖filename = filenames[i]img_name = filename[:-4]_add_to_tfrecord(dataset_dir, img_name, tfrecord_writer)i += 1fidx += 1print('\nFinished converting the Pascal VOC dataset!')#原數(shù)據(jù)集路徑，輸出路徑以及輸出文件名 dataset_dir="./VOC2007/" output_dir="./TFRecords" name="voc_train" def main(_):run(dataset_dir, output_dir,name)if __name__ == '__main__':tf.app.run()

在獲得訓練好的模型，進行檢測時的demo.py如下：（較好）

#encoding:utf-8 import tensorflow as tf import numpy as npimport os from matplotlib import pyplot as plt from PIL import Image from object_detection.utils import label_map_util from object_detection.utils import visualization_utils as vis_utils#下載下來的模型的目錄 MODEL_DIR = 'object_detection/ssd_mobilenet_v1_coco_2018_01_28' #下載下來的模型的文件 MODEL_CHECK_FILE = os.path.join(MODEL_DIR, 'frozen_inference_graph.pb') #數(shù)據(jù)集對于的label MODEL_LABEL_MAP = os.path.join('object_detection/data', 'mscoco_label_map.pbtxt') #數(shù)據(jù)集分類數(shù)量，可以打開mscoco_label_map.pbtxt文件看看 MODEL_NUM_CLASSES = 90#這里是獲取實例圖片文件名，將其放到數(shù)組中 PATH_TO_TEST_IMAGES_DIR = 'object_detection/test_images' TEST_IMAGES_PATHS = [os.path.join(PATH_TO_TEST_IMAGES_DIR, 'image{}.jpg'.format(i)) for i in range(1, 6)]#輸出圖像大小，單位是in IMAGE_SIZE = (12, 8)tf.reset_default_graph()#將模型讀取到默認的圖中 with tf.gfile.GFile(MODEL_CHECK_FILE, 'rb') as fd:_graph = tf.GraphDef()_graph.ParseFromString(fd.read())tf.import_graph_def(_graph, name='')#加載COCO數(shù)據(jù)標簽，將mscoco_label_map.pbtxt的內(nèi)容轉(zhuǎn)換成 # {1: {'id': 1, 'name': u'person'}...90: {'id': 90, 'name': u'toothbrush'}}格式 label_map = label_map_util.load_labelmap(MODEL_LABEL_MAP) categories = label_map_util.convert_label_map_to_categories(label_map, max_num_classes=MODEL_NUM_CLASSES) category_index = label_map_util.create_category_index(categories)#將圖片轉(zhuǎn)化成numpy數(shù)組形式 def load_image_into_numpy_array(image):(im_width, im_height) = image.sizereturn np.array(image.getdata()).reshape((im_height, im_width, 3)).astype(np.uint8)#在圖中開始計算 detection_graph = tf.get_default_graph() with tf.Session(graph=detection_graph) as sess:for image_path in TEST_IMAGES_PATHS:print(image_path)#讀取圖片image = Image.open(image_path)#將圖片數(shù)據(jù)轉(zhuǎn)成數(shù)組image_np = load_image_into_numpy_array(image)#增加一個維度image_np_expanded = np.expand_dims(image_np, axis=0)#下面都是獲取模型中的變量，直接使用就好了image_tensor = detection_graph.get_tensor_by_name('image_tensor:0')#存放所有檢測框boxes = detection_graph.get_tensor_by_name('detection_boxes:0')#每個檢測結(jié)果的可信度scores = detection_graph.get_tensor_by_name('detection_scores:0')#每個框?qū)?yīng)的類別classes = detection_graph.get_tensor_by_name('detection_classes:0')#檢測框的個數(shù)num_detections = detection_graph.get_tensor_by_name('num_detections:0')#開始計算(boxes, scores, classes, num_detections) = sess.run([boxes, scores, classes, num_detections],feed_dict={image_tensor : image_np_expanded})#打印識別結(jié)果print(num_detections)print(boxes)print(classes)print(scores)#得到可視化結(jié)果vis_utils.visualize_boxes_and_labels_on_image_array(image_np,np.squeeze(boxes),np.squeeze(classes).astype(np.int32),np.squeeze(scores),category_index,use_normalized_coordinates=True,line_thickness=8)#顯示plt.figure(figsize=IMAGE_SIZE)plt.imshow(image_np)plt.show()

恩，復習+鞏固！

sweet~

總結(jié)

以上是生活随笔為你收集整理的将自己手动标注的数据集(PascalVOC格式)转化为.TFRecord格式的全部內(nèi)容，希望文章能夠幫你解決所遇到的問題。

如果覺得生活随笔網(wǎng)站內(nèi)容還不錯，歡迎將生活随笔推薦給好友。

上一篇：理解 Linux 中的关机命令
下一篇：笔记本键盘被锁定了怎么办？笔记本如何解锁