python关键词挖掘_seo必备工具,python采集关键词挖掘数据
采集數(shù)據(jù)來源于站長之家!
需要輸入關(guān)鍵詞!
引用的庫
import?requests
from?lxml?import?etree
import?re
import?xlwt
import?time
例子:
1.etree采集列表
2.切片操作
3.保存excel格式
#站長工具關(guān)鍵詞挖掘
# -*- coding=utf-8 -*-
import requests
from lxml import etree
import re
import xlwt
import time
headers={
'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_3) AppleWebKit/534.55.3 (KHTML, like Gecko) Version/5.1.3 Safari/534.53.10'
}
#查詢關(guān)鍵詞是否能找到相關(guān)的關(guān)鍵字
def search_keyword(keyword):
data={
'kw': keyword,
'page': '1',
'by': '0',
}
url="http://stool.chinaz.com/baidu/words.aspx"
html=requests.post(url,data=data,headers=headers).text
time.sleep(3)
#print(html)
con=etree.HTML(html)
key_result=con.xpath('//div[@class="col-red lh30 fz14 tc"]/text()')
try:
key_result=key_result[0] #沒有找到相關(guān)的關(guān)鍵字
except:
key_result=[]
#print(key_result)
return key_result
#獲取關(guān)鍵詞頁碼數(shù)和記錄條數(shù)
def get_page_number(keyword):
data = {
'kw': keyword,
'page': '1',
'by': '0',
}
url = "http://stool.chinaz.com/baidu/words.aspx"
html = requests.post(url, data=data, headers=headers).text
time.sleep(3)
# print(html)
con = etree.HTML(html)
page_num = con.xpath('//span[@class="col-gray02"]/text()')
page_numberze = r'共(.+?)頁'
page_number = re.findall(page_numberze, page_num[0], re.S)
page_number = page_number[0]
#print(page_number)
total_data = con.xpath('//p[@class="col-gray lh24 fr pr5"]') # 數(shù)據(jù)記錄
total_datas = total_data[0].xpath('string(.)') # 獲取節(jié)點(diǎn)所有文本
#print(total_datas)
print(f'挖掘關(guān)鍵詞:{keyword}-{total_datas}')
return page_number
#獲取關(guān)鍵詞數(shù)據(jù)
def get_keyword_datas(keyword,page_number):
datas_list = []
for i in range(1,page_number+1):
print(f'正在采集第{i}頁關(guān)鍵詞挖掘數(shù)據(jù)...')
data = {
'kw': keyword,
'page': i,
'by': '0',
}
#print(data)
url = "http://stool.chinaz.com/baidu/words.aspx"
html = requests.post(url, data=data, headers=headers).text
time.sleep(3)
#print(html)
con = etree.HTML(html)
key_words = con.xpath('//p[@class="midImg"]/a/span/text()') # 關(guān)鍵詞
#print(key_words)
keyword_all_datas = []
keyword_datas = con.xpath('//ul[@class="ResultListWrap "]/li/div[@class="w8-0"]/a')
for keyword_data in keyword_datas:
keyword_data = keyword_data.text
if keyword_data != None:
keyword_all_datas.append(keyword_data)
#print(keyword_all_datas)
overall_indexs = keyword_all_datas[0::5] # 整體指數(shù)
#print(overall_indexs )
pc_indexs = keyword_all_datas[1::5] # pc指數(shù)
#print(pc_indexs)
mobile_indexs = keyword_all_datas[2::5] # 移動(dòng)指數(shù)
#print(mobile_indexs)
s360_indexs = keyword_all_datas[3::5] # 360指數(shù)
#print(s360_indexs)
collections = keyword_all_datas[4::5] # 收錄量
#print(collections)
ips = con.xpath('//ul[@class="ResultListWrap "]/li/div[@class="w15-0 kwtop"]/text()') # 預(yù)估流量
if ips==[]:
ips =['--']
#print(ips)
first_place_hrefs = con.xpath(
'//ul[@class="ResultListWrap "]/li/div[@class="w18-0 lh24 tl"]/a/text()') # 首頁位置鏈接
if first_place_hrefs==[]:
first_place_hrefs=con.xpath('//ul[@class="ResultListWrap "]/li/div[@class="w18-0 lh24 tl"]/text()')
#print(first_place_hrefs)
first_place_titles = con.xpath(
'//ul[@class="ResultListWrap "]/li/div[@class="w18-0 lh24 tl"]/p[@class="lh17 pb5"]/text()') # 首頁位置標(biāo)題
if first_place_titles == []:
first_place_titles=['--']
#print(first_place_titles)
data_list = []
for key_word, overall_index, pc_index, mobile_index, s360_index, collection, ip, first_place_href, first_place_title in zip(
key_words, overall_indexs, pc_indexs, mobile_indexs, s360_indexs, collections, ips, first_place_hrefs,
first_place_titles
):
data = [
key_word,
overall_index,
pc_index,
mobile_index,
s360_index,
collection,
ip,
first_place_href,
first_place_title,
]
print(data)
print('\n')
data_list.append(data)
time.sleep(3)
datas_list.extend(data_list) #合并關(guān)鍵詞數(shù)據(jù)
return datas_list
#保存關(guān)鍵詞數(shù)據(jù)為excel格式
def bcsj(keyword,data):
workbook = xlwt.Workbook(encoding='utf-8')
booksheet = workbook.add_sheet('Sheet 1', cell_overwrite_ok=True)
title = [['關(guān)鍵詞', '整體指數(shù)', 'PC指數(shù)', '移動(dòng)指數(shù)', '360指數(shù)', '預(yù)估流量(ip)', '收錄量', '網(wǎng)站首位鏈接', '網(wǎng)站首位標(biāo)題']]
title.extend(data)
#print(title)
for i, row in enumerate(title):
for j, col in enumerate(row):
booksheet.write(i, j, col)
workbook.save(f'{keyword}.xls')
print(f"保存關(guān)鍵詞數(shù)據(jù)為 {keyword}.xls 成功!")
if __name__ == '__main__':
keyword = input('請輸入關(guān)鍵詞>>')
print('正在查詢,請稍后...')
result=search_keyword(keyword)
if result=="沒有找到相關(guān)的關(guān)鍵字":
print('\n')
print (result)
print("該關(guān)鍵詞沒有挖掘到關(guān)鍵詞數(shù)據(jù)")
else:
print('\n')
page_number=get_page_number(keyword)
print('\n')
print('正在采集關(guān)鍵詞挖掘數(shù)據(jù),請稍后...')
print('\n')
page_number=int(page_number)
datas_list=get_keyword_datas(keyword,page_number)
print('\n')
print('關(guān)鍵詞挖掘數(shù)據(jù)采集結(jié)果:')
print('========================采集結(jié)果========================\n\n')
for datas in datas_list:
print(datas)
print('\n\n========================采集結(jié)束========================\n')
bcsj(keyword, datas_list)
總結(jié)
以上是生活随笔為你收集整理的python关键词挖掘_seo必备工具,python采集关键词挖掘数据的全部內(nèi)容,希望文章能夠幫你解決所遇到的問題。
- 上一篇: Symbian开发——Symbian开发
- 下一篇: 英语专业有计算机课程吗,英语专业转行学计