bilibili 根据up主下载其下的投稿
生活随笔
收集整理的這篇文章主要介紹了
bilibili 根据up主下载其下的投稿
小編覺得挺不錯的,現在分享給大家,幫大家做個參考.
1 下載模塊:
#!/usr/bin/env pythonimport requests import json import time import re import hashlib import os from get_headers import get_ua # 自己的ua池 # 通用下載函數 def get_content(url,refer = None,params = None):headers = {'User-Agent':get_ua().encode('utf-8'),'Referer':url,'Connection':'keep-alive'}if refer:headers['Referer'] = refertry:response = requests.get(url,headers = headers,params = params)time.sleep(1)status_code = response.status_codeif status_code == 200 or status_code == 302:return responseelse:print ('___status code is %d ___'%status_code)exit()except Exception as error:print ('___get_content is failed___')exit()class DownloadByUrl(object):'''this class just for download and save api moves another class'''# 只是根據url下載視頻,且視頻局限于普通視頻,不包括番劇api_url = 'http://interface.bilibili.com/playurl?'fmt2qlt = dict(hdflv=4, flv=3, hdmp4=2, mp4=1)SEC1 = '1c15888dc316e05a15fdd0a02ed6584f'url = ''dan_base = 'https://comment.bilibili.com/{}.xml'def __init__(self,url = None):print ('___a bzhan prject creat____')def make_xml_url(self,response):cid = re.findall(r'cid=(\d+)', response.text)[0]aid = re.findall(r'aid=(\d+)', response.text)[0]ts = int(time.time())for i in self.fmt2qlt: # autochoose highest qualityquality = self.fmt2qlt[i]params_str = 'cid={}&player=1&quality={}&ts={}'.format(cid, self.fmt2qlt[i], ts)sign = hashlib.md5(params_str + self.SEC1).hexdigest()params = 'cid={}&player=1&quality={}&ts={}&sign={}'.format(cid, self.fmt2qlt[i], ts, sign)xml_url = self.api_url + params # now the xml_url has been made # next thing is get xml_url and parse it to get real_urlsxml_response = get_content(xml_url)if len(xml_response.text) < 250: # show xml_url is wrong or dont exist 一般來說下載250就是構造錯誤了print ('this is wrong ++++')passelse:self.parse(xml_response,quality)breakdef parse(self,xml_response,quality):length = re.findall(r'<timelength>(\d+)<\/timelength>',xml_response.text)cid = ''.join(re.findall(r'av(\d+)',self.url))res_url = re.findall('<url>(.*?)<\/url>',xml_response.text)[0]real_url = 'http://'+''.join(re.findall('\[http://(.*?)\]',res_url))# real_url = 'http://'+''.join(res)if 'mp4' in real_url:contain = 'mp4'elif 'flv' in real_url:contain = 'mp4'elif 'hdflv' in real_url:contain = 'hdflv'elif 'hdmp4' in real_url:contain = 'hdmp4'# 確定下載的視頻的格式,contain是容器的格式print (length)self.test_download(real_url,cid,contain)# for 1 hour video still one url so get the first# self.test_download(real_url,cid,quality)def test_download(self,url,cid,contain):# 為了服務器的負載考慮,將我們必要的參數顯示出來就行了,所以這是測試函數target_path = os.path.join('/home/zy/file/spider/txt/',cid+'.'+contain) print ('url = {},cid = {},\n qlt = {},\n target_path = {}'.format(url,cid,contain,target_path))print ('next is download')def download(self,url,cid,contain): # 真正的下載函數target_path = os.path.join('/home/zy/file/spider/txt/',cid+'.'+contain)print (target_path)with open(target_path,'wb') as file:content = get_content(url,refer = self.url)# print contentfile.write(content)# file.close()def get_danmu(self,response):cid = ''.join(re.findall('cid=(\d+)',response.text))danmu_url = dan_base.format(cid)# 彈幕的api就類似dan_base def download_by_url(self,start_url): # set processself.url = start_urlresponse = get_content(start_url)self.make_xml_url(response)兩個類:
一個是獲取up主的所有投稿視頻的url,另一個是確定投稿視頻的具體個數和每個視頻的av地址
最后main函數:
def main_by_up(url,page = 1):dbu = DownloadByUp()jn = JudgeNum()db = DownloadByUrl()up_video = dbu.up_video_info(url,page = page)print ('there has {} video'.format(len(up_video)))for i in up_video:if len(jn.judge_url_page(i)) == 1:print ('___________total 1 page____________')db.download_by_url(i)else:res = []print ('+++++++++++total {} page+++++++++++++'.format(len(jn.judge_url_page(i))))for j in jn.judge_url_page(i):db.download_by_url(j)結果如下:
測試的目標up是:https://www.bilibili.com/video/av17102034/?from=search&seid=15833708722644867014
總結
以上是生活随笔為你收集整理的bilibili 根据up主下载其下的投稿的全部內容,希望文章能夠幫你解決所遇到的問題。
- 上一篇: 陕西省计算机考试准考证打印---2022
- 下一篇: 安徽省内计算机专业哪家大学强,安徽省大学