LFW數據集->rec文件
- 數據集準備:
- raw data下載:(提供兩種來源)
- http://vis-www.cs.umass.edu/lfw/index.html#download?
- 已經對齊好到bin文件:https://download.csdn.net/download/ustczhng2012/15002984
- 數據對齊
- 基于原圖+landmark點對齊圖片
- LFW直接下載的raw data沒有landmark點,一般需要landmark模型生成關鍵點,然后旋轉縮放對齊后再生成bin文件
import cv2
import numpy as np
from skimage import transform as transsrc1 = np.array([[51.642, 50.115], [57.617, 49.990], [35.740, 69.007],[51.157, 89.050], [57.025, 89.702]],dtype=np.float32)
#<--left
src2 = np.array([[45.031, 50.118], [65.568, 50.872], [39.677, 68.111],[45.177, 86.190], [64.246, 86.758]],dtype=np.float32)#---frontal
src3 = np.array([[39.730, 51.138], [72.270, 51.138], [56.000, 68.493],[42.463, 87.010], [69.537, 87.010]],dtype=np.float32)#-->right
src4 = np.array([[46.845, 50.872], [67.382, 50.118], [72.737, 68.111],[48.167, 86.758], [67.236, 86.190]],dtype=np.float32)#-->right profile
src5 = np.array([[54.796, 49.990], [60.771, 50.115], [76.673, 69.007],[55.388, 89.702], [61.257, 89.050]],dtype=np.float32)src = np.array([src1, src2, src3, src4, src5])
src_map = {112: src, 224: src * 2}arcface_src = np.array([[38.2946, 51.6963], [73.5318, 51.5014], [56.0252, 71.7366],[41.5493, 92.3655], [70.7299, 92.2041]],dtype=np.float32)arcface_src = np.expand_dims(arcface_src, axis=0)# lmk is prediction; src is template
def estimate_norm(lmk, image_size=112, mode='arcface'):assert lmk.shape == (5, 2)tform = trans.SimilarityTransform()lmk_tran = np.insert(lmk, 2, values=np.ones(5), axis=1)min_M = []min_index = []min_error = float('inf')if mode == 'arcface':assert image_size == 112src = arcface_srcelse:src = src_map[image_size]for i in np.arange(src.shape[0]):tform.estimate(lmk, src[i])M = tform.params[0:2, :]results = np.dot(M, lmk_tran.T)results = results.Terror = np.sum(np.sqrt(np.sum((results - src[i])**2, axis=1)))# print(error)if error < min_error:min_error = errormin_M = Mmin_index = ireturn min_M, min_indexdef norm_crop(img, landmark, image_size=112, mode='arcface'):M, pose_index = estimate_norm(landmark, image_size, mode)warped = cv2.warpAffine(img, M, (image_size, image_size), borderValue=0.0)return warped
- 生成rec和idx文件
- 可以參考emore.rec生成方法,使用該方法不需要上述數據對齊腳本,里面自帶對齊腳本,只需要配置好圖片配置txt文件即可
- 創建pair文件
import glob
import os.path
import numpy as np
import osdef find_not_zero_pos(sstr):for i in range(len(sstr)):if sstr[i] != "0":return ireturn len(sstr) - 1def get_real_str(sstr):i = find_not_zero_pos(sstr)return sstr[i:]def create_match_content():matched_result = set()k = 0sub_dirs = [x[0] for x in os.walk(INPUT_DATA)]while len(matched_result) < 3000:for sub_dir in sub_dirs[1:]:extensions = 'jpg'file_list = []dir_name = os.path.basename(sub_dir)file_glob = os.path.join(INPUT_DATA, dir_name, '*.' + extensions)# glob.glob(file_glob)獲取指定目錄下的所有圖片file_list.extend(glob.glob(file_glob))if not file_list:continueif len(file_list) >= 2:label_name = dir_namelength = len(file_list)random_number1 = np.random.randint(length)random_number2 = np.random.randint(length)while random_number1 == random_number2:random_number1 = np.random.randint(length)random_number2 = np.random.randint(length)base_name1 = os.path.basename(file_list[random_number1 % length])base_name2 = os.path.basename(file_list[random_number2 % length])if (file_list[random_number1 % length] != file_list[random_number2 % length]):base_name1 = base_name1.split("_")[-1]base_name1 = base_name1.split(".")[0]base_name2 = base_name2.split("_")[-1]base_name2 = base_name2.split(".")[0]matched_result.add(label_name + ' ' + get_real_str(base_name1) + ' ' + get_real_str(base_name2))# print(label_name + ' ' + get_real_str(base_name1) + ' ' + get_real_str(base_name2))k = k + 1return matched_result, kdef create_unmatch_content():"""不同類的匹配對"""unmatched_result = set()k = 0while len(unmatched_result) < 3000:sub_dirs = [x[0] for x in os.walk(INPUT_DATA)]length_of_dir = len(sub_dirs)for j in range(24):for i in range(1, length_of_dir):class1 = sub_dirs[i]random_number = np.random.randint(length_of_dir)while random_number == 0 | random_number == i:random_number = np.random.randint(length_of_dir)class2 = sub_dirs[random_number]class1_name = os.path.basename(class1)class2_name = os.path.basename(class2)extensions = 'jpg'file_list1 = []file_list2 = []file_glob1 = os.path.join(INPUT_DATA, class1_name, '*.' + extensions)file_list1.extend(glob.glob(file_glob1))file_glob2 = os.path.join(INPUT_DATA, class2_name, '*.' + extensions)file_list2.extend(glob.glob(file_glob2))if file_list1 and file_list2:base_name1 = os.path.basename(file_list1[j % len(file_list1)])base_name2 = os.path.basename(file_list2[j % len(file_list2)])base_name1 = base_name1.split("_")[-1]base_name1 = base_name1.split(".")[0]base_name1 = get_real_str(base_name1)base_name2 = base_name2.split("_")[-1]base_name2 = base_name2.split(".")[0]base_name2 = get_real_str(base_name2)s = class2_name + ' ' + base_name2 + ' ' + class1_name + ' ' + base_name1if (s not in unmatched_result):unmatched_result.add(s)if len(unmatched_result) > 3000:breakk = k + 1return unmatched_result, kif __name__ == '__main__':INPUT_DATA = r'dataset/val/lfw-deepfunneled'txt_path = 'dataset/val/pairs.txt'if os.path.isfile(txt_path):os.remove(txt_path)result, k1 = create_match_content()print(k1)# print(result)result_un, k2 = create_unmatch_content()print(k2)# print(result_un)file = open(txt_path, 'w')result1 = list(result)result2 = list(result_un)file.write('10 300\n')for i in range(10):for pair in result1[i * 300:i * 300 + 300]:file.write(pair + '\n')for pair in result2[i * 300:i * 300 + 300]:file.write(pair + '\n')file.close()
- 生成驗證集bin文件
- 注意:生成到bin文件數據是BGR格式的,在使用時需要注意
import pickle
import os
from tqdm import tqdm
import cv2
if __name__ == '__main__':lfw_dir = r'dataset/val/lfw-deepfunneled'image_size = (112, 112)pairs_path = r'dataset/val/pairs.txt'bin_output = r'dataset/val/lfw.bin'lfw_bins = []issame_list = []with open(pairs_path, 'r') as f:lines = f.readlines()line = lines[0].strip().split(' ')for i in tqdm(range(1, len(lines))):temp = lines[i].strip().split(' ')if len(temp) == 3:img_path_1 = os.path.join(lfw_dir, temp[0], temp[0]+'_'+temp[1].zfill(4)+'.jpg')img_1 = cv2.imread(img_path_1)img_1 = cv2.resize(img_1, image_size)img_save_path_1 = os.path.join(lfw_dir, temp[0], temp[0] + '_' + temp[1].zfill(4) + '_resize.jpg')cv2.imwrite(img_save_path_1, img_1)with open(img_save_path_1, 'rb') as fin:_bin_1 = fin.read()lfw_bins.append(_bin_1)img_path_2 = os.path.join(lfw_dir, temp[0], temp[0]+'_'+temp[2].zfill(4)+'.jpg')img_2 = cv2.imread(img_path_2)img_2 = cv2.resize(img_2, image_size)img_save_path_2 = os.path.join(lfw_dir, temp[0], temp[0]+'_'+temp[2].zfill(4)+'_resize.jpg')cv2.imwrite(img_save_path_2, img_2)with open(img_save_path_2, 'rb') as fin:_bin_2 = fin.read()lfw_bins.append(_bin_2)issame_list.append(1)elif len(temp) == 4:img_path_1 = os.path.join(lfw_dir, temp[0], temp[0]+'_'+temp[1].zfill(4)+'.jpg')img_1 = cv2.imread(img_path_1)img_1 = cv2.resize(img_1, image_size)img_save_path_1 = os.path.join(lfw_dir, temp[0], temp[0] + '_' + temp[1].zfill(4) + '_resize.jpg')cv2.imwrite(img_save_path_1, img_1)with open(img_save_path_1, 'rb') as fin:_bin_1 = fin.read()lfw_bins.append(_bin_1)img_path_2 = os.path.join(lfw_dir, temp[2], temp[2]+'_'+temp[3].zfill(4)+'.jpg')img_2 = cv2.imread(img_path_2)img_2 = cv2.resize(img_2, image_size)img_save_path_2 = os.path.join(lfw_dir, temp[2], temp[2] + '_' + temp[3].zfill(4) + '_resize.jpg')cv2.imwrite(img_save_path_2, img_2)with open(img_save_path_2, 'rb') as fin:_bin_2 = fin.read()lfw_bins.append(_bin_2)issame_list.append(0)else:print("drop this line: %d" %i)continuewith open(bin_output, 'wb') as f:pickle.dump((lfw_bins, issame_list), f, protocol=pickle.HIGHEST_PROTOCOL)
?
總結
以上是生活随笔為你收集整理的insight face数据打包(一):LFW的全部內容,希望文章能夠幫你解決所遇到的問題。
如果覺得生活随笔網站內容還不錯,歡迎將生活随笔推薦給好友。