當前位置：首頁 > 编程资源 > 编程问答 >内容正文

编程问答

tf2多种方式对图像数据集进行预处理

發(fā)布時間：2023/12/10 编程问答 33 豆豆

生活随笔收集整理的這篇文章主要介紹了 tf2多种方式对图像数据集进行预处理小編覺得挺不錯的,現(xiàn)在分享給大家,幫大家做個參考.

import tensorflow as tf import tensorflow.keras as keras import matplotlib.pyplot as plt AUTOTUNE=tf.data.experimental.AUTOTUNE#用CPU動態(tài)設(shè)置并行調(diào)用的數(shù)量 import pathlib #示例將從url上下載的數(shù)據(jù)進行處理 # data_root_orig=keras.utils.get_file(origin='https://storage.googleapis.com/download.tensorflow.org/example_images/flower_photos.tgz', # fname='flower_photos', untar=True)#utils.get_file是用于從網(wǎng)上下載數(shù)據(jù)并且解壓# data_root=pathlib.Path(data_root_orig)#data_root定義為Path類 data_root=pathlib.Path('flower_photos')#如果數(shù)據(jù)已經(jīng)下載，就直接用此方法讀取本地文件夾for item in data_root.iterdir():#遍歷data_root下的文件print(item) #暑促和當前文件夾下的文件 import random all_image_paths=list(data_root.glob('*/*')) #遍歷目錄中子文件夾下的文件，如果改成*/就是遍歷目錄中的文件 all_image_paths=[str(path) for path in all_image_paths] #將子目錄文件夾下面的文件字符串化并且放入列表中 random.shuffle(all_image_paths)#打亂順序 image_count=len(all_image_paths) print(image_count)#輸出一共有多少張圖片#確定每張圖片的標簽 label_names=sorted(item.name for item in\data_root.glob('*/') if item.is_dir())#is_dir()檢測指定的文件是否是一個目錄，如果是則返回TRUE、否則返回FALSE # 由于遍歷返回的是一個生成器，sorted返回的是排序后的副本，并非原來的生成器數(shù)據(jù) # is_dir()檢測指定的文件是否是一個目錄，如果是則返回TRUE、否則返回FALSE print(label_names) #輸出當前樣本地標簽名字 #為每個標簽分配索引 label_to_index=dict((name,index)for index,name in\enumerate(label_names)) print(label_to_index) # 該字典已經(jīng)經(jīng)各個標簽名字打好了對應(yīng)的類號 # 根據(jù)當前圖像所在目錄的名字對應(yīng)的字典值來賦予類別號 all_image_labels=[label_to_index[pathlib.Path(path).parent.name] for path in all_image_paths] #-------------------------------------------------- #讀取一張圖像查看圖像屬性 img_path=all_image_paths[0] print(img_path) img_raw=tf.io.read_file(img_path)#讀取一行圖像 img_tensor=tf.image.decode_image(img_raw)#解析該圖像 print(img_tensor.dtype) #輸出圖像的相關(guān)信息 print(img_tensor.shape) img_final = tf.image.resize(img_tensor, [192, 192])#裁剪 img_final = img_final/255.0#歸一化 print(img_final.shape) #歸一化后輸出相關(guān)信息 print(img_final.numpy().min()) print(img_final.numpy().max()) #----------------------------------------------------- # tf.image.decode_image(),返回dtype類型的Tensor，對于BMP，JPEG和PNG圖像其shape為[height, width, num_channels]， # 對于GIF圖像，其shape為[num_frames, height, width, 3]。 # 知道圖像格式的時候可以使用 tf.image.decode_jpeg或其他格式對應(yīng)的API def preprocess_image(image):image=tf.image.decode_jpeg(image,channels=3) # 解析圖片返回相應(yīng)的tensorimage=tf.image.resize(image,[192,192]) #圖像裁剪image/=255.0 # 歸一化處理return imagedef load_and_preprocess_image(path):#讀取并預(yù)處理圖像image=tf.io.read_file(path)return preprocess_image(image)path_ds=tf.data.Dataset.from_tensor_slices(all_image_paths) #對所有的圖片切片 image_ds=path_ds.map(load_and_preprocess_image,num_parallel_calls=AUTOTUNE)#利用load_and_preprocess_image對切片的數(shù)據(jù)預(yù)處理plt.figure(figsize=(8,8)) #輸出切片處理后的圖像 #輸出3張圖像，因為此時數(shù)據(jù)類型為TakeDataSet，所以要用此方式來調(diào)取數(shù)據(jù) # for n,image in enumerate(image_ds.take(3)): # plt.subplot(2,2,n+1) # plt.imshow(image) # plt.grid(False) # plt.xticks([]) # plt.yticks([]) # plt.xlabel((all_image_paths[n])) # plt.show()label_ds=tf.data.Dataset.from_tensor_slices(tf.cast(all_image_labels,tf.int64)) #顯示前10個標簽 for label in label_ds.take(10):print(label_names[label])#打包(圖片，標簽) image_label_ds=tf.data.Dataset.zip((image_ds,label_ds)) print(image_label_ds) # 也可以用切片替換tf.data.Dataset.zip ds=tf.data.Dataset.from_tensor_slices((all_image_paths,all_image_labels))# def load_and_preprocess_from_path_label(path,label): # return load_and_preprocess_image(path),label # image_label_ds=ds.map(load_and_preprocess_from_path_label) # print(image_label_ds)# 將數(shù)據(jù)打亂，劃分為BATCHSIZE大小 BATHC_SIZE=32 #在 .repeat 之后 .shuffle，會在 epoch 之間打亂數(shù)據(jù)（當有些數(shù)據(jù)出現(xiàn)兩次的時候，其他數(shù)據(jù)還沒有出現(xiàn)過）。 #在 .batch 之后 .shuffle，會打亂 batch 的順序，但是不會在 batch 之間打亂數(shù)據(jù)。ds=image_label_ds.shuffle(buffer_size=image_count) ds=ds.repeat() ds=ds.prefetch(buffer_size=AUTOTUNE)# prefetch從數(shù)據(jù)集中預(yù)取數(shù)據(jù) print(ds)#可以通過使用tf.data.Dataset.apply方法 # 和融合過的 tf.data.experimental.shuffle_and_repeat 函數(shù)來解決: ds=image_label_ds.apply(tf.data.experimental.shuffle_and_repeat\(buffer_size=image_count)) ds=ds.batch(BATHC_SIZE) ds=ds.prefetch(buffer_size=AUTOTUNE) print(ds)#和前面ds結(jié)果一樣#TFRecord文件 image_ds=tf.data.Dataset.from_tensor_slices(all_image_paths).map(tf.io.read_file) tfrec=tf.data.experimental.TFRecordWriter('images.tfrec')#將圖像數(shù)據(jù)寫在images.tfrec中去 tfrec.write(image_ds) image_ds=tf.data.TFRecordDataset('images.tfrec').map(preprocess_image) ds=tf.data.Dataset.zip((image_ds,label_ds)) ds=ds.apply(tf.data.experimental.shuffle_and_repeat(buffer_size=image_count)) ds=ds.batch(BATHC_SIZE).prefetch(AUTOTUNE) print(ds) #利用tensor序列化加速 paths_ds=tf.data.Dataset.from_tensor_slices(all_image_paths) image_ds=paths_ds.map(load_and_preprocess_image) print(image_ds)#圖像tensor化 # tensor序列化至TFRecord ds=image_ds.map(tf.io.serialize_tensor) print(ds) tfrec=tf.data.experimental.TFRecordWriter('images.tfrec') tfrec.write(ds)ds=tf.data.TFRecordDataset('images.tfrec') def parse(x): # 對前面寫入的數(shù)據(jù)進行解析result=tf.io.parse_tensor(x,out_type=tf.float32)result=tf.reshape(result,[192,192,3])return result ds=ds.map(parse,num_parallel_calls=AUTOTUNE) # 解析數(shù)據(jù) print(ds) ds=tf.data.Dataset.zip((ds,label_ds)) ds=ds.apply(tf.data.experimental.shuffle_and_repeat(buffer_size=image_count)) ds=ds.batch(BATHC_SIZE).prefetch(AUTOTUNE) print(ds)

參考來源于：用 tf.data 加載圖片 ?|? TensorFlow Core (google.cn)

總結(jié)

以上是生活随笔為你收集整理的tf2多种方式对图像数据集进行预处理的全部內(nèi)容，希望文章能夠幫你解決所遇到的問題。

如果覺得生活随笔網(wǎng)站內(nèi)容還不錯，歡迎將生活随笔推薦給好友。

上一篇： win10下什么拼音输入法好用
下一篇： js导出excel单元格内换行符代码_前