生活随笔
收集整理的這篇文章主要介紹了
股市专辑播放列表
小編覺得挺不錯的,現在分享給大家,幫大家做個參考.
要求:
爬取網頁內所有播放列表
今日股市(王斌)
老曹說股
老丁說股
import scrapy
from scrapy
import Request
, signals
import pandas
as pd
from album
.items
import AlbumItem
class ExampleSpider(scrapy
.Spider
):name
= 'example'def __init__(self
, **kwargs
):super(ExampleSpider
, self
).__init__
(**kwargs
)self
.data
= list()self
.totalpage
= 2self
.user
= [{'albumId': 2881558,'albumN': '今日股市(王斌)'},{'albumId': 4282711,'albumN': '老曹說股'},{'albumId': 4282711,'albumN': '老丁說股'}]def start_requests(self
):for uu
in self
.user
:dic
= uuself
.logger
.info
(f'dic{dic}')dic
['page']=1url
= f"https://www.ximalaya.com/revision/album/v1/getTracksList?albumId={dic['albumId']}&pageNum=1"yield Request
(url
, callback
=self
.parse
, cb_kwargs
=dic
)def parse(self
, response
,**kwargs
):lst
=response
.json
()["data"]["tracks"]if kwargs
["page"]==1:self
.totalpage
=response
.json
()["data"]["pageSize"]elif kwargs
['page']+1>self
.totalpage
:returnfor i
in lst
:id=i
["trackId"]title
=i
["title"]name
=kwargs
["albumN"]item
={"author": name
,"title":title
,}self
.data
.append
(item
)for k
in range(2,self
.totalpage
):kwargs
["page"]=kurl
=f'https://www.ximalaya.com/revision/album/v1/getTracksList?albumId={kwargs["albumId"]}&pageNum={k}'yield Request
(url
=url
, callback
=self
.parse
, cb_kwargs
=kwargs
)@classmethoddef from_crawler(cls
, crawler
, *args
, **kwargs
):spider
= cls
(**kwargs
)spider
._set_crawler
(crawler
)crawler
.signals
.connect
(spider
.spider_closed
, signal
=signals
.spider_closed
)return spider
def spider_closed(self
, spider
):print(f'self.data{self.data}')self
.logger
.info
(f'一共 {len(self.data)} 條數據')output
= '喜馬拉雅.xlsx'with pd
.ExcelWriter
(output
) as writer
:df_new
= pd
.DataFrame
(self
.data
)df_new
.to_excel
(writer
, sheet_name
='data', index
=False)spider
.logger
.info
('生成報表成功: %s', output
)
xpath的高級用法
總結
以上是生活随笔為你收集整理的股市专辑播放列表的全部內容,希望文章能夠幫你解決所遇到的問題。
如果覺得生活随笔網站內容還不錯,歡迎將生活随笔推薦給好友。