百度paddle框架学习(二):使用经典VGG网络完成人脸口罩判别
生活随笔
收集整理的這篇文章主要介紹了
百度paddle框架学习(二):使用经典VGG网络完成人脸口罩判别
小編覺得挺不錯的,現在分享給大家,幫大家做個參考.
一、數據集來源
百度AI Studio平臺口罩人臉分類數據集:https://aistudio.baidu.com/aistudio/datasetdetail/22392
二、目錄結構與項目環境
1、目錄結構
2、項目環境
import os
import json
import random
import paddle
import zipfile
import numpy as np
import paddle.fluid as fluid
import matplotlib.pyplot as plt
from PIL import Image
三、數據處理與參數配置
1、參數配置
"""參數配置
"""
train_parameters = {
"input_size": [3, 224, 224], # 輸入圖片的shape
"class_dim": -1, # 分類數
"src_path": './maskDetect.zip', # 原始數據集路徑
"target_path": './', # 要解壓的路徑
"train_list_path": './train.txt', # train.txt路徑
"eval_list_path": './eval.txt', # eval.txt路徑
"readme_path": './readme.json', # readme.json路徑
"label_dict": {}, # 標簽字典
"num_epochs": 5, # 訓練迭代次數
"train_batch_size": 5, # 訓練時每次喂入批次大小
"learning_strategy": { # 優化時相關參數配置
"lr": 0.001 # 學習率
}
}
"""參數初始化
"""
src_path = train_parameters['src_path']
target_path = train_parameters['target_path']
train_list_path = train_parameters['train_list_path']
eval_list_path = train_parameters['eval_list_path']
batch_size = train_parameters['train_batch_size']
2、數據處理
· 解壓數據集
def unzip_data(src_path, traget_path):
"""解壓原始數據集
"""
if not os.path.isdir(traget_path + "maskDetect"):
z = zipfile.ZipFile(src_path, 'r')
z.extractall(path=traget_path)
z.close()
unzip_data(src_path, target_path)
· 生成數據列表
def get_data_list(target_path, train_list_path, eval_list_path):
"""生成數據列表
"""
# 存放所有類別信息
class_detail = []
# 獲取所有類別保存的文件夾名
data_list_path = target_path + 'maskDetect/'
class_dirs = os.listdir(data_list_path)
# 總的圖像數量
all_class_images = 0
# 存放類別標簽
class_label = 0
# 存放類別數目
class_dim = 0
# 存儲要寫進eval.txt和train.txt中的內容
trainer_list = []
eval_list = []
# 讀取每個類別,['maskimages', 'nomaskimages']
for class_dir in class_dirs:
if class_dir != '.DS_Store':
class_dim += 1
# 每個類別的信息
class_detail_list = {}
eval_sum = 0
trainer_sum = 0
# 統計每個類別有多少張圖片
class_sum = 0
# 獲取類別路徑
path = data_list_path + class_dir
# 獲取所有圖片
img_paths = os.listdir(path)
# 遍歷文件夾下每個圖片
for img_path in img_paths:
name_path = path + '/' +img_path # 每張圖片路徑
# 每10張圖片取一次圖片做驗證集
if class_sum % 10 == 0:
# 驗證集數目+1
eval_sum += 1
eval_list.append(name_path + ' %d' % class_label + '
')
else:
trainer_sum += 1 # 訓練集數目
trainer_list.append(name_path + ' %d' % class_label + '
')
# 類別數目
class_sum += 1
# 所有類別圖片數目
all_class_images += 1
# 說明的json文件的class_detail數據
class_detail_list['class_name'] = class_dir # 類別名稱
class_detail_list['class_label'] = class_label # 類別標簽
class_detail_list['class_eval_images'] = eval_sum # 該類數據的測試集數目
class_detail_list['class_trainer_images'] = trainer_sum # 該類數據的訓練集數目
class_detail.append(class_detail_list)
# 初始化標簽列表
train_parameters['label_dict'][str(class_label)] = class_dir
class_label += 1
# 初始化分類數
train_parameters['class_dim'] = class_dim
# 亂序
random.shuffle(eval_list)
with open(eval_list_path, 'a') as f:
for eval_image in eval_list:
f.write(eval_image)
random.shuffle(trainer_list)
with open(train_list_path, 'a') as ff:
for train_image in trainer_list:
ff.write(train_image)
# 用于說明的json文件信息
readjson = {'all_class_name': data_list_path,
'all_class_images': all_class_images,
'class_detail': class_detail}
jsons = json.dumps(readjson, sort_keys=True, indent=4, separators=(',', ': '))
with open(train_parameters['readme_path'], 'w') as f:
f.write(jsons)
print('生成數據列表完成.')
# 劃分訓練集和數據集,亂序并生成數據列表
# 每次生成數據列表前先清空train.txt和eval.txt
# 防止train.txt和eval.txt疊加寫入數據
with open(train_list_path, 'w') as f:
f.seek(0) # 游標移至文件頭
f.truncate() # 截斷游標后的字符
with open(eval_list_path, 'w') as ff:
ff.seek(0)
ff.truncate()
# 生成數據列表
get_data_list(target_path, train_list_path, eval_list_path)
生成的readme.json文件內容:
{
"all_class_images": 185,
"all_class_name": "./maskDetect/",
"class_detail": [
{
"class_eval_images": 12,
"class_label": 0,
"class_name": "maskimages",
"class_trainer_images": 103
},
{
"class_eval_images": 7,
"class_label": 1,
"class_name": "nomaskimages",
"class_trainer_images": 63
}
]
}
生成的train.txt文件內容:
生成的eval.txt文件內容:
· 定義并構造數據提供器
def custom_reader(file_list):
"""自定義reader
"""
def reader():
with open(file_list, 'r') as f:
lines = [line.strip() for line in f]
for line in lines:
img_path, lab = line.strip().split(' ')
img = Image.open(img_path)
if img.mode != 'RGB': # 統一色彩空間(防止部分圖像為RGBA色彩空間,程序報錯)
img = img.convert('RGB')
img = img.resize((224, 224), Image.BILINEAR)
img = np.array(img).astype('float32')
img = img.transpose((2, 0, 1)) # HWC to CHW
img = img / 255 # 歸一化
yield img, int(lab)
return reader
# 構造數據提供器
train_reader = paddle.batch(reader=custom_reader(train_list_path),
batch_size=batch_size,
drop_last=True)
eval_reader = paddle.batch(reader=custom_reader(eval_list_path),
batch_size=batch_size,
drop_last=True)
說明:數據預處理已合并在數據提供器定義函數之中,詳細情況請參考上述代碼的注釋部分。
四、網絡模型構建
1、VGG16模型結構
2、池化-卷積類構建
# 池化-卷積類
class ConvPool(fluid.dygraph.Layer):
def __init__(self,
num_channels, # 通道數
num_filters, # 卷積核個數
filter_size, # 卷積核尺寸
pool_size, # 池化核尺寸
pool_stride, # 池化步長
groups, # 卷積組數(連續卷積個數)
conv_stride=1,
conv_padding=1,
act=None, # 激活函數類型
pool_type='max'):
super(ConvPool, self).__init__()
self._conv2d_list = []
for i in range(groups):
# add_sublayer方法:返回一個由所有子層組成的列表
conv2d = self.add_sublayer('bb_%d' % i,
fluid.dygraph.Conv2D(num_channels=num_channels, # 通道數
num_filters=num_filters, # 卷積核個數
filter_size=filter_size, # 卷積核大小
stride=conv_stride, # 步長
padding=conv_padding, # padding大小,默認為0
act=act)
)
num_channels = num_filters
self._conv2d_list.append(conv2d)
self._pool2d = fluid.dygraph.Pool2D(pool_size=pool_size, # 池化核大小
pool_type=pool_type, # 池化類型,默認是最大池化
pool_stride=pool_stride) # 池化步長
def forward(self, inputs):
x = inputs
for conv in self._conv2d_list:
x = conv(x)
x = self._pool2d(x)
return x
說明:由于vgg16結構存在連續卷積,并且連續卷積后都跟有一池化層,結構比較有規律,因此使用一個池化-卷積類封裝卷積和池化層詳細參數請參考上述代碼及注釋。
3、網絡模型構建
# VGG16網絡
class VGGNet(fluid.dygraph.Layer):
def __init__(self):
super(VGGNet, self).__init__()
"""
ConvPool方法調用參數說明(以convpool01定義為例):
3 - 通道數為3
64 - 卷積核個數64
3 - 卷積核大小3×3
2 - 池化核大小2×2
2 - 池化步長2
2 - 連續卷積個數為2
"""
self.convpool01 = ConvPool(3, 64, 3, 2, 2, 2, act='relu')
self.convpool02 = ConvPool(64, 128, 3, 2, 2, 2, act='relu')
self.convpool03 = ConvPool(128, 256, 3, 2, 2, 3, act='relu')
self.convpool04 = ConvPool(256, 512, 3, 2, 2, 3, act='relu')
self.convpool05 = ConvPool(512, 512, 3, 2, 2, 3, act='relu')
# 全連接層定義
self.pool_5_shape = 512 * 7 * 7
self.fc01 = fluid.dygraph.Linear(self.pool_5_shape, 4096, act='relu')
self.fc02 = fluid.dygraph.Linear(4096, 4096, act='relu')
self.fc03 = fluid.dygraph.Linear(4096, 2, act='softmax')
def forward(self, inputs, label=None):
y = self.convpool01(inputs)
y = self.convpool02(y)
y = self.convpool03(y)
y = self.convpool04(y)
y = self.convpool05(y)
y = fluid.layers.reshape(y, shape=[-1, 512*7*7])
y = self.fc01(y)
y = self.fc02(y)
y = self.fc03(y)
if label is not None:
acc = fluid.layers.accuracy(input=y, label=label)
return y, acc
else:
return y
五、模型訓練中的參數可視化
1、定義參數容器
all_train_iter = 0
all_train_iters = []
all_train_costs = [] # 代價列表
all_train_accs = [] # 準確率列表
2、參數可視化函數定義
# 參數變化的可視化
def draw_train_process(title, iters, costs, accs, label_cost, label_acc):
plt.title(title, fontsize=24)
plt.xlabel('iter', fontsize=20)
plt.ylabel('cost/acc', fontsize=20)
plt.plot(iters, costs, color='red', label=label_cost)
plt.plot(iters, accs, color='green', label=label_acc)
plt.legend()
plt.grid()
plt.show()
def draw_process(title, color, iters, data, label):
plt.title(title, fontsize=24)
plt.xlabel('iter', fontsize=20)
plt.ylabel(label, fontsize=20)
plt.plot(iters, data, color=color, label=label)
plt.legend()
plt.grid()
plt.show()
說明:有關參數可視化函數的調用,將在訓練部分代碼中展示,稍安勿躁。
六、模型訓練與評估
1、模型訓練
# 訓練
with fluid.dygraph.guard():
print(train_parameters['class_dim'])
print(train_parameters['label_dict'])
vgg = VGGNet()
vgg.train()
optimizer = fluid.optimizer.AdamOptimizer(learning_rate=train_parameters['learning_strategy']['lr'],
parameter_list=vgg.parameters())
for epoch_num in range(train_parameters["num_epochs"]):
for batch_id, data in enumerate(train_reader()):
dy_x_data = np.array([x[0] for x in data]).astype('float32')
y_data = np.array([x[1] for x in data]).astype('int64')
y_data = y_data[:, np.newaxis]
# 將Numpy格式數據轉換成dygraph接收的輸入
img = fluid.dygraph.to_variable(dy_x_data)
label = fluid.dygraph.to_variable(y_data)
out, acc = vgg(img, label)
loss = fluid.layers.cross_entropy(out, label)
avg_loss = fluid.layers.mean(loss)
# 使用backward()方法執行反向傳播
avg_loss.backward()
optimizer.minimize(avg_loss)
# 將參數梯度清零保證下一輪訓練的正確性
vgg.clear_gradients()
# 記錄訓練中計算的數據,用于可視化
all_train_iter = all_train_iter + train_parameters["train_batch_size"]
all_train_iters.append(all_train_iter)
all_train_costs.append(loss.numpy()[0])
all_train_accs.append(acc.numpy()[0])
if batch_id % 10 == 0:
print('Loss at epoch {} step {}: {}, acc: {}'.format(epoch_num,
batch_id,
avg_loss.numpy(),
acc.numpy()))
# 調用參數可視化函數進行參數可視化
draw_train_process('training', all_train_iters, all_train_costs, all_train_accs, 'training cost', 'traing_acc')
draw_process('training loss', 'red', all_train_iters, all_train_costs, 'training loss')
draw_process('training acc', 'green', all_train_iters, all_train_accs, 'training acc')
# 保存模型參數
fluid.save_dygraph(vgg.state_dict(), 'vgg')
print('Final loss: {}'.format(avg_loss.numpy()), '
End of training')
參數可視化:
2、模型校驗
# 模型校驗
with fluid.dygraph.guard():
model, _ = fluid.load_dygraph('vgg')
vgg = VGGNet()
vgg.load_dict(model)
vgg.eval()
accs = []
for batch_id, data in enumerate(eval_reader()):
dy_x_data = np.array([x[0] for x in data]).astype('float32')
y_data = np.array([x[1] for x in data]).astype('int64')
y_data = y_data[:, np.newaxis]
img = fluid.dygraph.to_variable(dy_x_data)
label = fluid.dygraph.to_variable(y_data)
out, acc = vgg(img, label)
lab = np.argsort(out.numpy())
accs.append(acc.numpy()[0])
print('驗證集平均準確率: ', np.mean(accs))
七、模型預測
1、預測圖像預處理
def load_image(img_path):
# 預測圖像預處理
img = Image.open(img_path)
if img.mode != 'RGB':
img = img.convert('RGB')
img = img.resize((224, 224), Image.BILINEAR)
img = np.array(img).astype('float32')
img = img.transpose((2, 0, 1))
img = img / 255
return img
2、載入模型預測結果
label_dict = train_parameters['label_dict']
with fluid.dygraph.guard():
model, _ = fluid.dygraph.load_dygraph('vgg')
vgg = VGGNet()
vgg.load_dict(model)
vgg.eval()
# 可視化預測圖片
infer_path = './2.jpg'
img = Image.open(infer_path)
plt.imshow(img)
plt.show()
# 對預測圖像進行處理
infer_imgs = [load_image(infer_path)]
infer_imgs = np.array(infer_imgs)
for i in range(len(infer_imgs)):
data = infer_imgs[i]
dy_x_data = np.array(data).astype('float32')
dy_x_data = dy_x_data[np.newaxis, :, :, :]
img = fluid.dygraph.to_variable(dy_x_data)
out = vgg(img)
lab = np.argmax(out.numpy) # argmax()返回最大數的索引
print('第{}個樣本,被預測為: {}'.format(i+1, label_dict[str(lab)]))
print('預測結束')
測試圖像:
訓練過程與結果預測:
說明:相對于數據集(較小)來言,vgg16網絡模型是一個十分復雜的模型,所以模型訓練的最終結果并不理想,如果使用更大的數據集會得到更可觀的訓練效果。
總結
以上是生活随笔為你收集整理的百度paddle框架学习(二):使用经典VGG网络完成人脸口罩判别的全部內容,希望文章能夠幫你解決所遇到的問題。
- 上一篇: 自然语言处理之 nltk 英文分句、分词
- 下一篇: 开发中的“软”与“硬”:高画质移动游戏开