三、python中最基础的文件处理汇总
生活随笔
收集整理的這篇文章主要介紹了
三、python中最基础的文件处理汇总
小編覺得挺不錯的,現在分享給大家,幫大家做個參考.
文章目錄
- 前言
- 一、分配train和val的訓練數據
- 二、將一個文件夾的文件按比例復制給多個文件夾
- 三、多個txt文件的標簽內容合并在一個txt文件
- 四、圖片壓縮(有損壓縮和無損壓縮)
- 五、多個txt文件、圖片內容轉到1個excel文件
前言
最基本的文件操作。
提示:以下是本篇文章正文內容,下面案例可供參考
一、分配train和val的訓練數據
一個文件夾的文件按比例分為train和val數據。
'''train:val=1.5:8.5'''import os ,random,shutil #文件操作模塊
import globdef moveFile(all_img_dir,val_img_dir,all_txt_dir,val_txt_dir):pathdir=os.listdir(all_img_dir)filenumber=len(pathdir)ratio=0.15 #分配比例picknumber=int(filenumber*ratio) #抽取的文件數量 int類型sample=random.sample(pathdir,picknumber) #隨機抽取for name in sample:txtName=name[:-3]+"txt"shutil.move(os.path.join(all_img_dir,name),os.path.join(val_img_dir,name)) #從A移到Bshutil.move(os.path.join(all_txt_dir, txtName), os.path.join(val_txt_dir, txtName)) # 從A移到Breturnif __name__ == '__main__':all_img_dir=r"./data/text_recog/zyx_data/submit_data/recog/low_score_v3p1/*.png"val_img_dir=r"./data/text_recog/txt4val/img_gt1"all_txt_dir=r"./data/text_recog/zyx_data/submit_data/recog/low_score_v3p1/*.txt"val_txt_dir=r"./data/text_recog/txt4val/label1"moveFile(all_img_dir,val_img_dir,all_txt_dir,val_txt_dir)
二、將一個文件夾的文件按比例復制給多個文件夾
#!/usr/bin/env python
# -*- coding:utf-8 -*-import os
import random
import glob
import shutil
from shutil import copy2
trainpath = r"/data2/enducation/paper_recog_total/train-paper-recog/Recognization/deep-text-recognition-SHENG/data/text_recog/zyx_data/submit_data/recog/low_score_v3p1" #??????????????????txt?????
trainfiles_img=glob.glob(os.path.join(trainpath,"*.png"))num_train = len(trainfiles_img)
print( "num_train: " + str(num_train) )
index_list = list(range(num_train))
print(index_list)
random.shuffle(index_list)
num = 0
trainDir_img = r'/data/text_recog/txt4train/img_gt2'#?????????7??????????
validDir_img = r'/data/text_recog/txt4val/img_gt2'#?????????3??????????trainDir_txt=r'/data/text_recog/txt4train/label2'
validDir_txt=r'/data/text_recog/txt4val/label2'
for i in index_list:filepPath=trainfiles_img[i]files,fileName=os.path.split(filepPath)txtname=fileName[:-3]+"txt"txtpath=os.path.join(trainpath,txtname)if num < num_train*0.85:copy2(filepPath, trainDir_img)copy2(txtpath,trainDir_txt)print(f"train: {str(fileName)} -------> {str(txtname)}")else:copy2(filepPath, validDir_img)copy2(txtpath, validDir_txt)print(f"val: {str(fileName)} -------> {str(txtname)}")num += 1print(f"f{num} move success!")
分給10個文件夾`。
#!/usr/bin/env python
# -*- coding:utf-8 -*-import os
import random
import glob
import shutil
from shutil import copy2
trainpath = r"/data2/datas/ocr/text_recog/ocr_data_2021.6.30/train"
trainfiles_img=glob.glob(os.path.join(trainpath,"*.png"))num_train = len(trainfiles_img)
print( "num_train: " + str(num_train) )
index_list = list(range(num_train))
# print(index_list)
random.shuffle(index_list)
num = 0
trainDir_img = r'/data2/datas/ocr/text_recog/ocr_data_2021.6.30/all_train'for i in index_list:filepPath=trainfiles_img[i]files,fileName=os.path.split(filepPath)txtname=fileName[:-3]+"txt"txtpath=os.path.join(trainpath,txtname)# print(os.path.join(trainDir_img,"0"))if num < num_train*0.10:copy2(filepPath, os.path.join(trainDir_img,"0"))copy2(txtpath,os.path.join(trainDir_img,"0"))print(f"0: {str(fileName)} -------> {str(txtname)}")elif num_train*0.10<=num < num_train*0.20:copy2(filepPath, os.path.join(trainDir_img, "1"))copy2(txtpath, os.path.join(trainDir_img, "1"))print(f"1: {str(fileName)} -------> {str(txtname)}")elif num_train*0.20<=num < num_train*0.30:copy2(filepPath, os.path.join(trainDir_img, "2"))copy2(txtpath, os.path.join(trainDir_img, "2"))print(f"2: {str(fileName)} -------> {str(txtname)}")elif num_train*0.30<=num < num_train*0.40:copy2(filepPath, os.path.join(trainDir_img, "3"))copy2(txtpath, os.path.join(trainDir_img, "3"))print(f"3: {str(fileName)} -------> {str(txtname)}")elif num_train*0.40<=num < num_train*0.50:copy2(filepPath, os.path.join(trainDir_img, "4"))copy2(txtpath, os.path.join(trainDir_img, "4"))print(f"4: {str(fileName)} -------> {str(txtname)}")elif num_train*0.50<=num < num_train*0.60:copy2(filepPath, os.path.join(trainDir_img, "5"))copy2(txtpath, os.path.join(trainDir_img, "5"))print(f"5: {str(fileName)} -------> {str(txtname)}")elif num_train*0.60<=num < num_train*0.70:copy2(filepPath, os.path.join(trainDir_img, "6"))copy2(txtpath, os.path.join(trainDir_img, "6"))print(f"6: {str(fileName)} -------> {str(txtname)}")elif num_train*0.70<=num < num_train*0.80:copy2(filepPath, os.path.join(trainDir_img, "7"))copy2(txtpath, os.path.join(trainDir_img, "7"))print(f"7: {str(fileName)} -------> {str(txtname)}")elif num_train*0.80<=num < num_train*0.90:copy2(filepPath, os.path.join(trainDir_img, "8"))copy2(txtpath, os.path.join(trainDir_img, "8"))print(f"8: {str(fileName)} -------> {str(txtname)}")else:copy2(filepPath, os.path.join(trainDir_img, "9"))copy2(txtpath, os.path.join(trainDir_img, "9"))print(f"9: {str(fileName)} -------> {str(txtname)}")num += 1print(f"f{num} move success!")
三、多個txt文件的標簽內容合并在一個txt文件
import osdef Combine_txt():dirpath_label = r"./data/text_recog/txt4train/label"dirpath_img = r"/data/text_recog/txt4train/img_gt"files = os.listdir(dirpath_label)res = ""for file in files:if file.endswith(".txt"):name = file[:-3] + "png"img_path = os.path.join(dirpath_img, name)txtpath = os.path.join(dirpath_label, file)with open(txtpath, "r", encoding="utf-8") as file:content = file.read()text = img_path + "\t" + contentfile.close()input = "%s\n" % (text)res += inputoutpath = r"./data/text_recog/txt4train/img_gt/gt.txt"with open(outpath, "w", encoding="utf-8") as outfile:outfile.write(res)outfile.close()print(len(res))if __name__ == '__main__':Combine_txt()
四、圖片壓縮(有損壓縮和無損壓縮)
import cv2
import osimage_path=r'.\data2'
# image_savepath=r'./compress_img'
#取值范圍:0~9,數值越小,壓縮比越低。
def Compress_img(proportion):i = 0for num in os.listdir(image_path):for img_name in os.listdir(os.path.join(image_path,num)):image_file=os.path.join(image_path,num,img_name)image=cv2.imread(image_file)# cv2.imwrite(f'./compress_jpg_img/{proportion}/{i}.jpg',image,params=[cv2.IMWRITE_JPEG_QUALITY, proportion])'''[cv2.IMWRITE_JPEG_QUAITY,50]可以實現圖像的壓縮。其取值為[0,100].0時圖像可以得到極大地壓縮,但是圖像的品質會被大大降低。'''cv2.imwrite(f'./compress_jpg_img/{proportion}/{i}.jpg', image,params=[cv2.IMWRITE_PNG_COMPRESSION, proportion])'''[cv2.IMWRITE_PNG_COMPRESSION,0]是調整PNG圖像的壓縮比。為0時,壓縮比最小,圖像的品質最好。它的壓縮范圍為[0,9]'''print(f'{proportion}---{num}---{img_name}Saved successfully!')i+=1if __name__ == '__main__':#壓縮范圍4-9 PNG 取值范圍:0~9,數值越小,壓縮比越低。#壓縮范圍40-100 JPG 取值范圍:0~100,數值越小,壓縮比越高,圖片質量損失越嚴重。list=[45,50,55,60,65,70,75,80,85,90]for proportions in list:if not os.path.exists(rf'./compress_jpg_img/{proportions}'):os.makedirs(rf'./compress_jpg_img/{proportions}')Compress_img(proportions)
五、多個txt文件、圖片內容轉到1個excel文件
轉之前:
# !/usr/bin/python
# -*- codding: cp936 -*-from openpyxl import load_workbook
from openpyxl.drawing.image import Image
import osexcel_address = r"E:\enducate\practice\movefile\txt2excel\train.xlsx"
img_path=r"E:\enducate\practice\movefile\val\img_gt"
txtfile = r'E:\enducate\practice\movefile\val\label'wb = load_workbook(excel_address)
sht = wb.worksheets[0]for i,img_name in enumerate( os.listdir(img_path)):num=img_name[:-3]txtname=num+"txt"sht.cell(i + 1, 1, num)with open(os.path.join(txtfile,txtname),"r", encoding='utf-8') as f:for line in f.readlines():# strip ??????????line = line.strip('\n')# ?','???'\t',????????????????????????line = line.replace("\t",",")line = line.split(',')# ??7???for index in range(len(line)):sht.cell(i+1, 2, line[index])# ????img_address_1 = os.path.join(img_path,img_name)img = Image(img_address_1)#??????# img.width = 19.0# img.height = 39.0sht.add_image(img, f'C{i+1}')sht.column_dimensions['A'].width = 60.0sht.column_dimensions['B'].width = 60.0sht.column_dimensions['C'].width = 60.0sht.row_dimensions[i+1].height = 60.0wb.save(excel_address)
轉之后:
總結
以上是生活随笔為你收集整理的三、python中最基础的文件处理汇总的全部內容,希望文章能夠幫你解決所遇到的問題。
- 上一篇: 二、OCR训练时,将txt文件和图片数据
- 下一篇: 一、multiprocessing.po