pdf与图片互相转换
生活随笔
收集整理的這篇文章主要介紹了
pdf与图片互相转换
小編覺得挺不錯(cuò)的,現(xiàn)在分享給大家,幫大家做個(gè)參考.
1.pdf轉(zhuǎn)圖片
import os import numpy as np import cv2 from PIL import Image Image.MAX_IMAGE_PIXELS=None import tempfile import time import sys from pdf2image import convert_from_bytes# # 預(yù)處理程序 # sys.path.append("./data_prepare") # from data_prepare.batch_pdf2jpg import pdf2jpg, pdf2pil_imgs # from data_prepare.batch_rectify import batch_rectify2, rectify_single_img_fast # sys.path.append("./tools") # from tools.img_tools import cv_resize_long_edge, pil_resize_long_edgedef cv_resize_long_edge(cv_img, long_edge_length):# resize the cv_image(height, width) = cv_img.shape[:2]max_len = max(width, height)if max_len == long_edge_length:return cv_imgratio = long_edge_length / max_lenimg = cv2.resize(cv_img, None, fx=ratio, fy=ratio, interpolation=cv2.INTER_LINEAR)return imgdef pil_resize_long_edge(pil_img, long_edge_length):# resize the imagewidth, height = pil_img.sizemax_len = max(width, height)if max_len == long_edge_length:return pil_imgratio = max_len / long_edge_lengthimg = pil_img.resize((round(width/ratio), round(height/ratio)), Image.ANTIALIAS)return imgclass PDF:def __init__(self, pdf_bytes, model, dpi=300, n_threads=4, save_img_dir="./tmp_ocr_dir", small_size=1280):st = time.time()with tempfile.TemporaryDirectory() as tmp_out:pil_imgs = convert_from_bytes(pdf_bytes, output_folder=tmp_out, dpi=dpi, fmt="jpg", thread_count=n_threads)print("pdf拆分用時(shí): 共 %d 頁用時(shí): %.3fs" % (len(pil_imgs), time.time() - st))self.pages = []start = time.time()# print("deal with:", pdf_path, "output:", pdf_rst_dir)if save_img_dir is not None:os.makedirs(save_img_dir, exist_ok=True)for i, pil_img in enumerate(pil_imgs):page = {}cv_img = np.array(pil_img)# pil_img to cv_imgcv_img = cv2.cvtColor(cv_img, cv2.COLOR_BGR2RGB)# 限制最大尺寸max_size = 5000if max(cv_img.shape[:2]) > max_size:cv_img = cv_resize_long_edge(cv_img, max_size)if model is not None:# 使用方向分類模型cv_img_org = rectify_single_img_fast(model, cv_img)else:cv_img_org = cv_imgpage["ocv"] = cv_img_org# origin cv img# page["opil"] = Image.fromarray(cv2.cvtColor(cv_img_org, cv2.COLOR_BGR2RGB)) # origin pil img# page["scv"] = cv_resize_long_edge(cv_img_org, small_size) # small cv image# page["spil"] = pil_resize_long_edge(page["opil"], small_size) # small pil imageself.pages.append(page)if save_img_dir is not None:img_path = os.path.join(save_img_dir, "%d.jpg" % i)page["img_path"] = img_pathcv2.imwrite(img_path, page["ocv"])print("pdf初始化及矯正: 共 %d 頁用時(shí): %.3fs" % (len(self.pages), time.time() - start))def __getitem__(self, i):return self.pages[i]def __len__(self):return len(self.pages)if __name__ == '__main__':tmp_dir = "./test_img"if not os.path.exists(tmp_dir):os.mkdir(tmp_dir)path = './me_pdf'pdfs_list_path = [os.path.join(path,i) for i in os.listdir(path)]for i, pdf_list_path in enumerate(pdfs_list_path):# pdf_file_path = '../chengdu/bank_test.pdf'pdf_file_path = pdf_list_pathpdf_bin = open(pdf_file_path, 'rb').read()pdf = PDF(pdf_bin, model=None, save_img_dir=None, small_size=2000)pdf_rst = []for i, page in enumerate(pdf):# img = Image.fromarray(page['ocv'][..., ::-1])# img.save(os.path.join(tmp_dir, '{}.jpg'.format(i)), dpi=(300.0, 300.0), quality=100)img = page['ocv']name = pdf_list_path.split('/')[-1].split('.')[0]cv2.imwrite(os.path.join(tmp_dir, name + '_' + str(i)+'.jpg'), img)2.圖片轉(zhuǎn)pdf
from reportlab.lib.pagesizes import A4, portrait, landscape from reportlab.pdfgen import canvas import os import cv2 # imgs_path = './需要轉(zhuǎn)換成pdf圖片' # imgs_list_path = [os.path.join(imgs_path,i) for i in os.listdir(imgs_path)] # imgs_list_path = sorted(imgs_list_path) # for i, img_list_path in enumerate(imgs_list_path): # if i<1: # print('img_list_path:', img_list_path)def convert_images_to_pdf(imgs_path, pdf_path):pages = 0(w, h) = portrait(A4)c = canvas.Canvas(pdf_path, pagesize = portrait(A4))# l = os.listdir(img_path)# l.sort(key= lambda x:int(x[:-4]))imgs_list_path = [os.path.join(imgs_path, i) for i in os.listdir(imgs_path)]imgs_list_path = sorted(imgs_list_path)for img_list_path in imgs_list_path:# f = img_path + os.sep + str(img_list_path)c.drawImage(img_list_path, 0, 0, w, h)c.showPage()pages = pages + 1c.save()if __name__ == '__main__':# imgs_path = './需要轉(zhuǎn)換成pdf圖片'# path = './測(cè)試數(shù)據(jù)集_給梧州'# path = './紅頭文件/樣本文件_jpg'path = './紅頭文件/身份證pdf'dirs_list_path = [os.path.join(path, i) for i in os.listdir(path)]for i, dir_list_path in enumerate(dirs_list_path):# imgs_path ='./需要轉(zhuǎn)換成pdf圖片3'# pdf_path = './good.pdf'pdf_path = str(i+1)+'.pdf'convert_images_to_pdf(dir_list_path, pdf_path)?
總結(jié)
以上是生活随笔為你收集整理的pdf与图片互相转换的全部?jī)?nèi)容,希望文章能夠幫你解決所遇到的問題。
- 上一篇: pyecharts地图使用
- 下一篇: 产品金字塔