當前位置：首頁 > 编程资源 > 编程问答 >内容正文

编程问答

bilibili 根据up主下载其下的投稿

發布時間：2023/12/29 编程问答 21 豆豆

生活随笔收集整理的這篇文章主要介紹了 bilibili 根据up主下载其下的投稿小編覺得挺不錯的,現在分享給大家,幫大家做個參考.

1 下載模塊：

#!/usr/bin/env pythonimport requests import json import time import re import hashlib import os from get_headers import get_ua # 自己的ua池 # 通用下載函數 def get_content(url,refer = None,params = None):headers = {'User-Agent':get_ua().encode('utf-8'),'Referer':url,'Connection':'keep-alive'}if refer:headers['Referer'] = refertry:response = requests.get(url,headers = headers,params = params)time.sleep(1)status_code = response.status_codeif status_code == 200 or status_code == 302:return responseelse:print ('___status code is %d ___'%status_code)exit()except Exception as error:print ('___get_content is failed___')exit()class DownloadByUrl(object):'''this class just for download and save api moves another class'''# 只是根據url下載視頻，且視頻局限于普通視頻，不包括番劇api_url = 'http://interface.bilibili.com/playurl?'fmt2qlt = dict(hdflv=4, flv=3, hdmp4=2, mp4=1)SEC1 = '1c15888dc316e05a15fdd0a02ed6584f'url = ''dan_base = 'https://comment.bilibili.com/{}.xml'def __init__(self,url = None):print ('___a bzhan prject creat____')def make_xml_url(self,response):cid = re.findall(r'cid=(\d+)', response.text)[0]aid = re.findall(r'aid=(\d+)', response.text)[0]ts = int(time.time())for i in self.fmt2qlt: # autochoose highest qualityquality = self.fmt2qlt[i]params_str = 'cid={}&player=1&quality={}&ts={}'.format(cid, self.fmt2qlt[i], ts)sign = hashlib.md5(params_str + self.SEC1).hexdigest()params = 'cid={}&player=1&quality={}&ts={}&sign={}'.format(cid, self.fmt2qlt[i], ts, sign)xml_url = self.api_url + params # now the xml_url has been made # next thing is get xml_url and parse it to get real_urlsxml_response = get_content(xml_url)if len(xml_response.text) < 250: # show xml_url is wrong or dont exist 一般來說下載250就是構造錯誤了print ('this is wrong ++++')passelse:self.parse(xml_response,quality)breakdef parse(self,xml_response,quality):length = re.findall(r'<timelength>(\d+)<\/timelength>',xml_response.text)cid = ''.join(re.findall(r'av(\d+)',self.url))res_url = re.findall('<url>(.*?)<\/url>',xml_response.text)[0]real_url = 'http://'+''.join(re.findall('\[http://(.*?)\]',res_url))# real_url = 'http://'+''.join(res)if 'mp4' in real_url:contain = 'mp4'elif 'flv' in real_url:contain = 'mp4'elif 'hdflv' in real_url:contain = 'hdflv'elif 'hdmp4' in real_url:contain = 'hdmp4'# 確定下載的視頻的格式，contain是容器的格式print (length)self.test_download(real_url,cid,contain)# for 1 hour video still one url so get the first# self.test_download(real_url,cid,quality)def test_download(self,url,cid,contain):# 為了服務器的負載考慮，將我們必要的參數顯示出來就行了，所以這是測試函數target_path = os.path.join('/home/zy/file/spider/txt/',cid+'.'+contain) print ('url = {},cid = {},\n qlt = {},\n target_path = {}'.format(url,cid,contain,target_path))print ('next is download')def download(self,url,cid,contain): # 真正的下載函數target_path = os.path.join('/home/zy/file/spider/txt/',cid+'.'+contain)print (target_path)with open(target_path,'wb') as file:content = get_content(url,refer = self.url)# print contentfile.write(content)# file.close()def get_danmu(self,response):cid = ''.join(re.findall('cid=(\d+)',response.text))danmu_url = dan_base.format(cid)# 彈幕的api就類似dan_base def download_by_url(self,start_url): # set processself.url = start_urlresponse = get_content(start_url)self.make_xml_url(response)

兩個類：
一個是獲取up主的所有投稿視頻的url，另一個是確定投稿視頻的具體個數和每個視頻的av地址

# from import json import re from download import DownloadByUrl,get_content import requestsclass DownloadByUp(object): '''by the up info to get av'''avvideo = 'https://www.bilibili.com/video/av{}'apt = 'https://space.bilibili.com/ajax/member/getSubmitVideos?'url = ''# just one pagedef get_all_video_aid(self,up_main_url,page = 1):# 一般情況下，不設定頁數，就默認獲取一頁的視頻urlmid = ''.join(re.findall(r'(\d+)',up_main_url))params = {'page':page,'pagesize':5,# the number of you want to get# 每頁的投稿個數'mid':mid}response = get_content(self.apt,refer = self.url,params = params)jsc = json.loads(response.text)try:vlist = jsc['data']['vlist'] # return (title,aid) listreturn [self.avvideo.format(i['aid']) for i in vlist] except Exception as error:print ('>>> ',error)def up_video_info(self,url,page):self.url = urllisted = self.get_all_video_aid(url,page = page)return listed class JudgeNum(object):'''when we use video page''' # 判斷一共有多少頁index = 'http://www.bilibili.com/video/av{}/index_{}.html#page={}'totalpage = 1def judge_url_page(self,url):response = get_content(url)avnum = ''.join(re.findall(r'av(\d+)',url)[0])totalpage = int(''.join(re.findall("totalpage = '(\d+)'",response.text)))# return totalpagereturn [self.index.format(avnum,i,i) for i in range(1,totalpage+1)]# 返回該個投稿下的所有視頻url

最后main函數：

def main_by_up(url,page = 1):dbu = DownloadByUp()jn = JudgeNum()db = DownloadByUrl()up_video = dbu.up_video_info(url,page = page)print ('there has {} video'.format(len(up_video)))for i in up_video:if len(jn.judge_url_page(i)) == 1:print ('___________total 1 page____________')db.download_by_url(i)else:res = []print ('+++++++++++total {} page+++++++++++++'.format(len(jn.judge_url_page(i))))for j in jn.judge_url_page(i):db.download_by_url(j)

結果如下：
測試的目標up是：https://www.bilibili.com/video/av17102034/?from=search&seid=15833708722644867014

總結

以上是生活随笔為你收集整理的bilibili 根据up主下载其下的投稿的全部內容，希望文章能夠幫你解決所遇到的問題。

如果覺得生活随笔網站內容還不錯，歡迎將生活随笔推薦給好友。

bilibili

上一篇：陕西省计算机考试准考证打印---2022
下一篇：安徽省内计算机专业哪家大学强,安徽省大学