python异步协程,抓取豆瓣top250电影海报。
生活随笔
收集整理的這篇文章主要介紹了
python异步协程,抓取豆瓣top250电影海报。
小編覺得挺不錯的,現在分享給大家,幫大家做個參考.
# 利用正則及異步協程 爬取豆瓣top250電影圖片
# author:xyislove
# 歡迎各位大神修改指導
from asyncio import tasks
import re
from fake_useragent import UserAgent
import os
import asyncio
import aiofiles
import aiohttp############分頁獲取圖片鏈接#############################################
async def get_pic(url,headers,pic_url,name):async with aiohttp.ClientSession() as session:async with session.get(url,headers=headers) as resp:pagecontent=await resp.text()obj=re.compile(r'<li>.*?src="(?P<link>.*?)" class=',re.S)result=obj.finditer(pagecontent)for i in result:print(i.group('link'))pic_url.append(i.group('link'))print(f'{name}完成!')async def aio_download():tasks=[]pic_url=[]#創建圖片鏈接庫j=1for i in range(0,250,25):print(i)ua=UserAgent()headers={'User-Agent':ua.random}url=f'https://movie.douban.com/top250?start={i}&filter='name=f'第{j}頁'tasks.append(asyncio.create_task(get_pic(url,headers,pic_url,name)))j=j+1await asyncio.wait(tasks)return pic_url
##############分頁獲取圖片鏈接##結束#######################################################
##############異步協程下載圖片########################
async def pic_download(i,session,headers,name):async with session.get(i,headers=headers) as resp:async with aiofiles.open(f'./pic/{name}.jpg',mode='wb') as f:await f.write(await resp.content.read())resp.close()f.close()async def aio_download2(pic):tasks=[]ua=UserAgent()headers={'User-Agent':ua.random}j=1async with aiohttp.ClientSession() as session:for i in pic:i=i.strip()name=jtask=asyncio.create_task(pic_download(i,session,headers,name))tasks.append(task)j=j+1await asyncio.wait(tasks)
##############結束#############################
# 主函數####
def main():# 獲取圖片鏈接 保存鏈接到txt文件loop=asyncio.get_event_loop()pic_url=loop.run_until_complete(aio_download())with open('pic-url.txt',mode='a',encoding='utf-8') as f:for i in pic_url:i=i.strip()f.write(f'{i}\n')f.close()
#####下載圖片到文件夾#######################################with open('pic-url.txt',mode='r',encoding='utf-8') as f:pic=[]for line in f:pic.append(line)f.close()loop=asyncio.get_event_loop()loop.run_until_complete(aio_download2(pic)) if __name__=='__main__':main()
總結
以上是生活随笔為你收集整理的python异步协程,抓取豆瓣top250电影海报。的全部內容,希望文章能夠幫你解決所遇到的問題。
- 上一篇: 央视曝光危险wifi
- 下一篇: CodeForces 670D2 Mag