python获取qq好友ip_qqzeng-ip.dat IP库读取python版
qqzeng-ip.dat是一個特殊格式的dat文件,可以快速的查找IP對應的地理位置信息。據作者測試的結果來看,是100萬ip查找速度0.5秒。
當然這和語言有非常大的關系,python的循環性能一直是為人所詬病的。目前python版本測試的結果是10萬IP的查找速度是3.X秒左右,還算夠用,畢竟真實情況下的30秒~5分鐘內的日志不太可能出現一批數據中的不重復IP超過10萬個。
作者提供了解析dat的java/c/php腳本,但沒有提供python版本的。所以我就寫了一個,以供需要用python語言讀取ip的地理位置信息使用。
代碼如下:
#coding:utf-8
import os
import math
import socket
import struct
import io
from io import SEEK_SET
path = os.path.normpath(os.path.dirname(os.path.abspath(__file__)) + "/qqzeng-ip-utf8.dat")
class IpSearch(object):
fp = None
firstStartIpOffset = None
lastStartIpOffset = None
preStartOffset = None
preEndOffset = None
ipCount = None
prefixCount = None
prefixList = dict()
def __init__(self):
self.fp = io.open(path,"rb")
buff = self.fp.read(16)
self.firstStartIpOffset = self.bytesToLong(buff[0],buff[1],buff[2],buff[3])
self.lastStartIpOffset = self.bytesToLong(buff[4],buff[5],buff[6],buff[7])
self.preStartOffset = self.bytesToLong(buff[8],buff[9],buff[10],buff[11])
self.preEndOffset = self.bytesToLong(buff[12],buff[13],buff[14],buff[15])
self.ipCount = (self.lastStartIpOffset - self.firstStartIpOffset) / 12 + 1
self.prefixCount = (self.preEndOffset - self.preStartOffset) / 9 + 1
self.fp.seek(self.preStartOffset,SEEK_SET)
preBuff = self.fp.read(self.prefixCount*9)
for k in range(0,self.prefixCount):
i = k*9
startIndex = self.bytesToLong(preBuff[1+i],preBuff[2+i],preBuff[3+i],preBuff[4+i])
endIndex = self.bytesToLong(preBuff[5+i],preBuff[6+i],preBuff[7+i],preBuff[8+i])
self.prefixList[ord(preBuff[i])] = {
"start_index":startIndex,
"end_index":endIndex
}
def __del__(self):
if self.fp != None:
self.fp.close()
def get(self,ip):
if ip == '':
return ""
high = 0
low = 0
startIp = 0
endIp = 0
localOffset = 0
localLength = 0
prefix = ip.split(".")[0]
prefix = int(prefix)
ipnum = self.ip2unit(ip)
if prefix in self.prefixList.keys():
index = self.prefixList[prefix]
low = index["start_index"]
high = index["end_index"]
else:
return ""
left = low if low == high else self.binarySearch(low,high,ipnum)
left,startIp,endIp,localOffset,localLength = self.getIndex(left,startIp,endIp,localOffset,localLength)
if startIp <= ipnum and endIp >= ipnum:
return self.getLocal(localOffset,localLength)
else:
return ""
def getLocal(self,localOffset,localLength):
self.fp.seek(localOffset,SEEK_SET)
return self.fp.read(localLength)
def getIndex(self,left,startIp,endIp,localOffset,localLength):
leftOffset = self.firstStartIpOffset + left*12
self.fp.seek(leftOffset,SEEK_SET)
buff = self.fp.read(12)
startIp = self.bytesToLong(buff[0],buff[1],buff[2],buff[3])
endIp = self.bytesToLong(buff[4],buff[5],buff[6],buff[7])
r3 = (ord(buff[8]) << 0 | ord(buff[9]) << 8 | ord(buff[10]) << 16)
if r3 < 0:
r3 += 4294967296
localOffset = r3
localLength = ord(buff[11])
return [left,startIp,endIp,localOffset,localLength]
def binarySearch(self,low,high,k):
m = 0
while low <= high:
mid = (low + high)/2
endIpNum = self.getEndIpNum(mid)
if endIpNum >= k:
m = mid
if mid == 0:
break
high = mid - 1
else:
low = mid + 1
return m
def getEndIpNum(self,left):
leftOffset = self.firstStartIpOffset + (left*12) + 4
self.fp.seek(leftOffset,SEEK_SET)
buf = self.fp.read(4)
return self.bytesToLong(buf[0],buf[1],buf[2],buf[3])
def ip2unit(self,ip):
lip = self.ip2long(ip)
if lip < 0:
lip += 4294967296
return lip
def ip2long(self,ip):
packedIP = socket.inet_aton(ip)
return struct.unpack("!L", packedIP)[0]
def bytesToLong(self,a,b,c,d):
iplong = (ord(a) << 0) | (ord(b) << 8) | (ord(c) << 16) | (ord(d) << 24)
if iplong < 0:
iplong += 4294967296
return iplong
if __name__ == '__main__':
ipSearch = IpSearch()
print ipSearch.get("210.51.200.123").decode("utf-8").encode("gbk")
import time
startTime = time.time()
for i in range(0,100000):
ipSearch.get("210.51.200.123")
endTime = time.time()
print "time waste:",endTime-startTime
測試結果如下:
與百度查出的IP信息進行對比:
還挺不錯的,對吧。
總結
以上是生活随笔為你收集整理的python获取qq好友ip_qqzeng-ip.dat IP库读取python版的全部內容,希望文章能夠幫你解決所遇到的問題。
- 上一篇: mysql 当前用户连接数_实战:判断m
- 下一篇: python验证数学原理_一起学open