生活随笔
收集整理的這篇文章主要介紹了
Python爬取某宝宝商品评论等数据
小編覺得挺不錯的,現在分享給大家,幫大家做個參考.
提前準備參數
import re
import requests
import random
import time
import pandas
as pddf
= []
headers
= {'cookie': '','user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/98.0.4758.102 Safari/537.36 Edg/98.0.1108.62','referer': 'https://detail.tmall.com/item.htm?id=539339147058&price=99&sourceType=item&sourceType=item&suid=e9ca3123-b90d-4ed5-b31f-08f696654755&ut_sk=1.XVltFfIJHZADAP26 uLU46qu_21646297_1646213190055.Copy.ShareGlobalNavigation_1&un=bf83687cd45b2be19a2d98b7feb65231&share_crt_v=1&un_site=0&spm=a2159r.13376460.0.0&tbSocialPopKey=shareItem&sp_tk=Q0c0ZTI0cnRhZHM=&cpp=1&shareurl=true&short_name=h.fMQsL31&bxsign=scdLssu8BjsPSMY422ksv9WiDQBjg3Ih_OKP8X8cJgTl5W3pRkm74cPZeKXptUmWhTSFuRQNlnWc7SUD1w2I3VdhjkFDjEAGAre3x1CMDXKBsh4MyL5hqQLpo6LhUFDB22U&sm=3d25d1?tk=CG4e24rtads&app=chrome&skuId=4571541867149','accept-encoding': 'gzip, deflate, br','accept-language': 'zh-CN,zh;q=0.9,en;q=0.8,en-GB;q=0.7,en-US;q=0.6'
}
url
= 'https://rate.tmall.com/list_detail_rate.htm'def get_html(url
, header
, page
):t_param
= time
.time
()t_list
= str(t_param
).split
(".")params
= {'itemId': 556732726926,'spuId': 866309580,'sellerId': 725677994,'order': 3,'currentPage': page
,'append': 0,'content': 1,"callback": 'jsonp'+str(int(t_list
[1][3:]) + 1),"_ksTS": t_list
[0] + t_list
[1][:3] + "_" + t_list
[1][3:]}r
= requests
.get
(url
, headers
=header
, params
=params
)if r
.status_code
== 200:return r
.text
else:passdef get_item(num
):user_name
= []item_type
= []rate_content
= []rate_date
= []for page
in range(1, num
):try:text
= get_html
(url
, headers
, page
)user_name
.extend
(re
.findall
('"displayUserNick":"(.*?)"', text
))item_type
.extend
(re
.findall
('"auctionSku":"(.*?)"', text
))rate_content
.extend
(re
.findall
('"rateContent":"(.*?)"', text
))rate_date
.extend
(re
.findall
('"rateDate":"(.*?)"', text
))print(f
"第{page}頁爬取完畢")time
.sleep
(random
.randint
(3,9))except:print("Nothing you catch")for i
in range(len(user_name
)):df
.append
([user_name
[i
], rate_date
[i
], item_type
[i
], rate_content
[i
]])df1
= pd
.DataFrame
(df
, columns
=['user_name', 'rate_date', 'item_type', 'rate_content'])df1
.to_csv
('taobao_items.csv', index
=False, encoding
='utf-8')if __name__
== '__main__':
num
= 5get_item
(num
)
總結
以上是生活随笔為你收集整理的Python爬取某宝宝商品评论等数据的全部內容,希望文章能夠幫你解決所遇到的問題。
如果覺得生活随笔網站內容還不錯,歡迎將生活随笔推薦給好友。