爬虫——————爬取中金所,深交所,上交所期权数据
生活随笔
收集整理的這篇文章主要介紹了
爬虫——————爬取中金所,深交所,上交所期权数据
小編覺得挺不錯(cuò)的,現(xiàn)在分享給大家,幫大家做個(gè)參考.
先從深交所開始:直接上傳源碼:
from bs4 import BeautifulSoup from lxml import etree import pandas as pd import akshare as ak import datetime import requests import csv from contextlib import closing import time from urllib.request import urlopen import requests from urllib import request from io import BytesIO import gzip import random#設(shè)定獲取數(shù)據(jù)的日期 date = ak.tool_trade_date_hist_sina() date =date.loc[date['trade_date']>='2019-01-01'] df1 = pd.DataFrame()for j in date['trade_date']:print(j)#session = requests.Session()# s = datetime.datetime.strptime(j,'%Y-%m-%d').strftime('%Y%m')# s2 = datetime.datetime.strptime(j,'%Y-%m-%d').strftime('%d')#url = 'http://www.szse.com/api/report/ShowReport?SHOWTYPE=xlsx&CATALOGID=option_hyfxzb&TABKEY=tab1&txtSearchDate=%(j)s&random=%(r)s'%{'j':j,'r':random.random()}#'http://query.sse.com.cn/derivative/downloadRisk.do?trade_date=%(YM)s%(D)s&productType=0'%{'YM':s,'D':s2}url = 'http://www.szse.cn/api/report/ShowReport?SHOWTYPE=xlsx&CATALOGID=option_hyfxzb&TABKEY=tab1&txtSearchDate=%(j)s&random=0.5379373345285146'%{'j':j}print(url)response = requests.get(url)#print(response.content) """ 這一塊本人很不滿意,先保存再讀取,多此一舉。主要是因?yàn)橹苯语@示發(fā)現(xiàn)亂碼,本人無論如何都無法解析為正常結(jié)果, 只能先放到xlsx,之后重新讀取保存。請(qǐng)諸位大蝦見到給小弟一點(diǎn)幫助,如何解決。多謝!!!!!!!!!! """"with open('D:/結(jié)果存放3.xlsx', 'ab') as file_handle: file_handle.write(response.content) # 寫入# file_handle.write('\n')df= pd.read_excel('D:/結(jié)果存放3.xlsx')df['trade_date'] = jdf1 = df1.append(df)df1.to_csv('szse.csv')爬取上交所
import csv from contextlib import closing import time from urllib.request import urlopen date = ak.tool_trade_date_hist_sina() date =date.loc[date['trade_date']>='2019-01-01'] df1 = pd.DataFrame() #//query.sse.com.cn/derivative/downloadRisk.do?trade_date=20201207&productType=0 for j in date['trade_date']:s = datetime.datetime.strptime(j,'%Y-%m-%d').strftime('%Y%m')s2 = datetime.datetime.strptime(j,'%Y-%m-%d').strftime('%d')url = 'http://query.sse.com.cn/derivative/downloadRisk.do?trade_date=%(YM)s%(D)s&productType=0'%{'YM':s,'D':s2}# 讀取數(shù)據(jù)with closing(requests.get(url, stream=True)) as r:f = (line.decode('gbk') for line in r.iter_lines())reader = csv.reader(f,delimiter=',', quotechar=',')for row in reader:print(row)#print(row.reverse())df = pd.DataFrame(row)df1=df1.append(df.T)df1.to_csv('sse.csv')爬取中金所
import datetime import requests from lxml import etree import pandas as pd import akshare as ak import time date = ak.tool_trade_date_hist_sina() date =date.loc[date['trade_date']>='2019-01-01'] df1 = pd.DataFrame() for j in date['trade_date']:s = datetime.datetime.strptime(j,'%Y-%m-%d').strftime('%Y%m')s2 = datetime.datetime.strptime(j,'%Y-%m-%d').strftime('%d')url = 'http://www.cffex.com.cn/sj/hqsj/rtj/%(YM)s/%(D)s/index.xml?id=39'%{'YM':s,'D':s2}response = requests.get(url)p = etree.HTML((response.content))df = pd.DataFrame()for i in range(1,len(p.xpath('//dailydata'))):#print('//dailydata[{}]/instrumentid/text()'.format(i))# print(p.xpath('//dailydata[{}]/instrumentid/text()'.format(i)))#df.loc[i,'instrument']=p.xpath('//dailydata[{}]/instrumentid/text()'.format(i))try:#print((p.xpath('//dailydata[{}]/instrumentid[1]/text()'))[i])df.loc[i,'instrumentid']=(p.xpath('//dailydata[{}]/instrumentid/text()'.format(i)))except:df.loc[i,'instrumentid']=0try:df.loc[i,'tradingday']=(p.xpath('//dailydata[{}]/tradingday/text()'.format(i)))except:df.loc[i,'tradingday']=0try:df.loc[i,'openprice']=(p.xpath('//dailydata[{}]/openprice/text()'.format(i)))except:df.loc[i,'openprice']=0try:df.loc[i,'highestprice']=(p.xpath('//dailydata[{}]/highestprice/text()'.format(i)))except:df.loc[i,'highestprice'] =0try:df.loc[i,'lowestprice']=(p.xpath('//dailydata[{}]/lowestprice/text()'.format(i)))except:df.loc[i,'lowestprice']=0try:df.loc[i,'closeprice']=(p.xpath('//dailydata[{}]/closeprice/text()'.format(i)))except:df.loc[i,'closeprice'] = 0try:df.loc[i,'preopeninterest']=(p.xpath('//dailydata[{}]/preopeninterest/text()'.format(i)))except:df.loc[i,'preopeninterest'] = 0try:df.loc[i,'openinterest']=(p.xpath('//dailydata[{}]/openinterest/text()'.format(i)))except:df.loc[i,'openinterest'] = 0try:df.loc[i,'presettlementprice']=(p.xpath('//dailydata[{}]/presettlementprice/text()'.format(i)))except:df.loc[i,'presettlementprice'] = 0try:df.loc[i,'settlementpriceif']=(p.xpath('//dailydata[{}]/settlementpriceif/text()'.format(i)))except:df.loc[i,'settlementpriceif'] = 0try:df.loc[i,'settlementprice']=(p.xpath('//dailydata[{}]/settlementprice/text()'.format(i)))except:df.loc[i,'settlementprice'] = 0try:df.loc[i,'volume']=(p.xpath('//dailydata[{}]/volume/text()'.format(i)))except:df.loc[i,'volume'] = 0try:df.loc[i,'turnover']=(p.xpath('//dailydata[{}]/turnover/text()'.format(i)))except:df.loc[i,'turnover'] = 0try:df.loc[i,'productid']=(p.xpath('//dailydata[{}]/productid/text()'.format(i)))except:df.loc[i,'productid'] = 0try:df.loc[i,'delta']=(p.xpath('//dailydata[{}]/delta/text()'.format(i)))except:df.loc[i,'delta'] = 0try:df.loc[i,'expiredate']=(p.xpath('//dailydata[i]/expiredate/text()'.format(i)))except:df.loc[i,'expiredate'] = 0df1 = df1.append(df)df1.to_csv('cffex.csv')以上是爬取三大交易所期權(quán)數(shù)據(jù)的源代碼,可以直接使用,也可以修改保存至數(shù)據(jù)庫。
總結(jié)
以上是生活随笔為你收集整理的爬虫——————爬取中金所,深交所,上交所期权数据的全部內(nèi)容,希望文章能夠幫你解決所遇到的問題。
- 上一篇: JAVA中protected的作用
- 下一篇: IDEA常用快捷键【win-mac对比】