黄河水文数据
# -*- coding:utf-8 -*-
# 僅需修改這個地方https://jn.lianjia.com/ershoufang/pg{}rs/ 將jn換成你所在城市的拼寫首字母小寫
import requests
from lxml import etree
import time
import random
import csv
import requests
import jsonclass LianjiaSpider(object):def __init__(self):self.url = "http://61.163.88.227:8006/hwsq.aspx"self.headers = {"User-Agent": "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/535.1 (KHTML, like Gecko) Chrome/14.0.835.163 Safari/535.1"}def get_page(self, url,i,j):print(url)res = requests.get(url=url, headers=self.headers)res.encoding = "utf-8"html = res.text# if(html == '{"totalSize":0,"saleBoardPoiList":[],"boardDigest":null}'):# html ={"totalSize":50,"saleBoardPoiList":[{"id":0,"name":"","weekSaleCount":"周銷量 0","score":0,"avgPrice":0,"cateName":"","areaName":"","distance":"","rank":0,"frontImg":"https://img.meituan.net/msmerchant/","oneSentence":"","saleBoardPoiGroup":null,"saleBoardPoiCoupon":{"icon":"https://p0.meituan.net/travelcube/","content":""},"saleBoardPoiPay":null,"branchList":null}],"boardDigest":null}# print(i)# print(html)# results_temp = html.replace('{"totalSize":50,"saleBoardDealList":', "").replace("}}]}", "")# results = results_temp + "}}]"# print(results)self.parse_page(html,i,j)# print(html)# print(i)def parse_page(self, html,i,j):print(i)results = html[35:-20]# print(len(results))print(results)# print(results.find("["))if (results.find("[") != 0):prefix = "["results = prefix + resultsprint(results)print(len(results))for list in json.loads(results):#print(list)id = list["id"]#print(id)name = list["name"]#print(name)weekSaleCount = list["weekSaleCount"]score = list["score"]avgPrice = list["avgPrice"]cateName = list["cateName"]areaName = list["areaName"]distance = list["distance"]rank = list["rank"]frontImg = list["frontImg"]oneSentence = list["oneSentence"]# with open('meituan.csv', 'a', newline='', encoding='utf-8')as f:# write = csv.writer(f)# write.writerow(# [ cityId,catId,id,name, weekSaleCount, score, avgPrice, cateName, areaName, distance, rank, frontImg,# oneSentence])def main(self):#20廣州、香港118,165白山,170鶴崗cityId_lists = [96]for i in cityId_lists: # 第二個實例# print(i)for j in range(1, 9):# print(j)time.sleep(random.randint(3, 5))url = self.url.format(i,j)# print(url)self.get_page(url,i,j)# print(j)if __name__ == '__main__':start = time.time()spider = LianjiaSpider()spider.main()end = time.time()print("執(zhí)行時間:%.2f" % (end - start))
總結(jié)
- 上一篇: docker的可视化管理
- 下一篇: HDLBits(7)——Multiple