多线程下载sis001的网友自拍贴图版面的图片
生活随笔
收集整理的這篇文章主要介紹了
多线程下载sis001的网友自拍贴图版面的图片
小編覺得挺不錯(cuò)的,現(xiàn)在分享給大家,幫大家做個(gè)參考.
多線程下載sis001的網(wǎng)友自拍貼圖版面的圖片,這個(gè)程序并不能下載全部版面而是只下載第二頁的內(nèi)容。因?yàn)榈诙撌亲钚碌膬?nèi)容哦。你需要一個(gè)sis001,5級(jí)以上的用戶名和密碼否則無法訪問此版面。
#!/usr/bin/python # -*- coding: cp936 -*- #coding utf-8import urllib import urllib2 import re import cookielib import Queue import threadingdef downPic(tiezi_url):req = urllib2.Request(q.get(),None,headers)tiezi_html = opener.open(req).read()#print tiezi_htmlre_img = re.compile(r'\<img src\=\"(http\:\/\/.*?\.jpg|attachments\/.*?.jpg)\"')img_list = re_img.findall(tiezi_html)#print img_listfor i in img_list:if re.match("http",i):print "%s downloading..."%ifilename = re.split(r'/',i)try:req = urllib2.Request(i,None,headers)res = opener.open(req).read()open(filename[-1],'wb').write(res)except:passelse:img_url = "http://38.103.161.185/forum/%s"%ifilename = re.split(r'/',img_url)print "inner link %s"%img_urltry:req = urllib2.Request(img_url,None,headers)res = opener.open(req).read()open(filename[-1],'wb').write(res)except:passcj = cookielib.CookieJar() opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(cj)) #urllib2.install_opener(opener) headers ={"User-agent":"Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1"} data = { "formhash":"3fec4925", "referer":"index.php", "loginfield":"username", "240aa46b3893fb57c436c0a3785b61e7":"xxxx", "ea32b1cadbde4b66ca614e0bb593d1c9":"xxxx", "questionid":"0", "answer":"", "cookietime":"2592000", "loginmode":"", "styleid":"", "loginsubmit":"true"} post_data = urllib.urlencode(data) req = urllib2.Request("http://38.103.161.185/forum/logging.php?action=login&",post_data,headers) content=opener.open(req) #print content.read() req2 = urllib2.Request("http://38.103.161.185/forum/forum-62-2.html",None,headers) board_html = opener.open(req2).read() #print board_html re_link = re.compile(r'\<a href\=\"(thread-\d{7}-1-2.html)') title_list = re_link.findall(board_html) #http://38.103.161.185/forum/thread-(4917300-1-1.html) #http://38.103.161.185/forum/forum-62-(2).html #[\u4e00-\u9fa5] #print title_list q = Queue.Queue(100) for i in title_list:tiezi_url = "http://38.103.161.185/forum/%s"%i#print tiezi_urlq.put(tiezi_url) print "total title:%s"%q.qsize() while True:if q.qsize()>0:th = threading.Thread(target=downPic,args=(tiezi_url,) )th.start()else:break=====
高效
#!/usr/bin/python # -*- coding: cp936 -*- #coding utf-8import urllib import urllib2 import re import cookielib import Queue import threading import socket import time import sys import randomdef log(message):log = open("log.txt","a")log.write(time.ctime()+" "+message+"\n")log.close()def getPic():i = q.get()if re.match("http",i): #print "%s downloading..."%ifilename = re.split(r'/',i)try:req = urllib2.Request(i,None,headers)res = opener.open(req).read()savefile = '.\\img\\'+filename[-1]+ str(int(random.random()*100000000))+'.jpg'open(savefile,'wb').write(res)except:etype, value, tb = sys.exc_info()errormsg = i + "||"+str(etype) +"||"+ str(value)log(errormsg)passelse:img_url = "http://38.103.161.185/forum/%s"%ifilename = re.split(r'/',img_url)#print "%s"%img_urltry:req = urllib2.Request(img_url,None,headers)res = opener.open(req).read()savefile = '.\\img\\'+filename[-1]+ str(int(random.random()*100000000))+'.jpg'open(savefile,'wb').write(res)except:etype, value, tb = sys.exc_info()errormsg = i + "||"+str(etype) +"||"+ str(value)log(errormsg)passdef downPic(tiezi_url,q):req = urllib2.Request(tiezi_url,None,headers)tiezi_html = opener.open(req).read()#print tiezi_htmlre_img = re.compile(r'\<img src\=\"(http\:\/\/.*?\.jpg|attachments\/.*?.jpg)\"')img_list = re_img.findall(tiezi_html)img_list = list(set(img_list))#print img_listfor i in img_list:q.put(i)while True:if q.qsize()>0:th = threading.Thread(target=getPic)th.start()#print "Queue %s"%q.qsize()else:breakheaders ={"User-agent":"Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1"} socket.setdefaulttimeout(30) cj = cookielib.CookieJar() #proxy = urllib2.ProxyHandler({'http': '127.0.0.1:8087'}) opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(cj)) #urllib2.install_opener(opener) data = {"formhash":"3fec4925","referer":"index.php","loginfield":"username","240aa46b3893fb57c436c0a3785b61e7":"xxx","ea32b1cadbde4b66ca614e0bb593d1c9":"xxx","questionid":"0","answer":"","cookietime":"2592000","loginmode":"","styleid":"","loginsubmit":"true"} post_data = urllib.urlencode(data) req = urllib2.Request("http://38.103.161.185/forum/logging.php?action=login&",post_data,headers) content=opener.open(req) #print content.read() req2 = urllib2.Request("http://38.103.161.185/forum/forum-62-1.html",None,headers) board_html = opener.open(req2).read() #print board_html re_link = re.compile(r'\<a href\=\"(thread-\d{7}-1-\d{1}.html)') title_list = re_link.findall(board_html) title_list = list(set(title_list)) #去除list中的重復(fù)項(xiàng) #http://38.103.161.185/forum/thread-(4917300)-1-(1).html #http://38.103.161.185/forum/forum-62-(2).html #[\u4e00-\u9fa5] #print title_listfor i in title_list:tiezi_url = "http://38.103.161.185/forum/%s"%iprint tiezi_urlq = Queue.Queue(0)downPic(tiezi_url,q)print 'All threads terminate!'總結(jié)
以上是生活随笔為你收集整理的多线程下载sis001的网友自拍贴图版面的图片的全部內(nèi)容,希望文章能夠幫你解決所遇到的問題。
- 上一篇: 非标自动化设计全过程,建议收藏
- 下一篇: 如何清理废弃pv和其对应的文件夹