if(os.path.exists(rootdir) == False)
os.mkdir(rootdir)
os.system(cmd)
for key,value in dict.items()
fd = open('xxxx.txt', encoding='utf-8') for line in fd: print line fd.close()
fd = open('xxxx.txt', 'a+', encoding='utf-8') fd.write('aaaaa' + '\n') fd.close()
導入 import xlrd 打開excel data = xlrd.open_workbook('demo.xls')?#注意這里的workbook首字母是小寫 查看文件中包含sheet的名稱 data.sheet_names() 得到第一個工作表,或者通過索引順序 或 工作表名稱 table = data.sheets()[0] table = data.sheet_by_index(0) table = data.sheet_by_name(u'Sheet1') 獲取行數和列數 nrows = table.nrows ncols = table.ncols 獲取整行和整列的值(數組) table.row_values(i) table.col_values(i) 循環行,得到索引的列表 for rownum in range(table.nrows): print table.row_values(rownum) 單元格 cell_A1 = table.cell(0,0).value cell_C4 = table.cell(2,3).value 分別使用行列索引 cell_A1 = table.row(0)[0].value cell_A2 = table.col(1)[0].value 簡單的寫入 row = 0 col = 0 ctype = 1?# 類型 0 empty,1 string, 2 number, 3 date, 4 boolean, 5 error value = 'lixiaoluo' xf = 0?# 擴展的格式化 (默認是0) table.put_cell(row, col, ctype, value, xf) table.cell(0,0) # 文本:u'lixiaoluo' table.cell(0,0).value # 'lixiaoluo'
導入xlwt import xlwt 新建一個excel文件 file = xlwt.Workbook()?#注意這里的Workbook首字母是大寫,無語吧 新建一個sheet table = file.add_sheet('sheet name') 寫入數據table.write(行,列,value) table.write(0,0,'test') 如果對一個單元格重復操作,會引發 returns error: # Exception: Attempt to overwrite cell: # sheetname=u'sheet 1' rowx=0 colx=0 所以在打開時加cell_overwrite_ok=True解決 table = file.add_sheet('sheet name',cell_overwrite_ok=True) 保存文件 file.save('demo.xls') 另外,使用style style = xlwt.XFStyle()?#初始化樣式 font = xlwt.Font()?#為樣式創建字體 font.name = 'Times New Roman' font.bold = True style.font = font?#為樣式設置字體 table.write(0, 0, 'some bold Times text', style)?# 使用樣式
try: options,args = getopt.getopt(sys.argv[1:],"hp:i:",["help","ip=","port="]) except getopt.GetoptError: sys.exit() for name,value in options: if name in ("-h","--help"): usage() if name in ("-i","--ip"): print(value) if name in ("-p","--port"): print(value)
import requests AGENT = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/52.0.2743.116 Safari/537.36' HEADERS = { 'User-Agent': AGENT, 'Content-Type':'application/x-www-form-urlencoded; charset=UTF-8', 'X-Requested-With':'XMLHttpRequest', 'Accept':'*/*' session = requests.session() #模擬登錄 postdata = { 'defaults':'xxx', 'fromLogin':'xxx', 'userName':'xxx', 'password':'xxxx' } url = 'xxxxxxxx' login_info = session.post(url, headers = HEADERS, data = postdata,verify = False) if(login_info.status_code == requests.codes.ok): print('login success') return True else: print('login err') return False } #下載html頁面 def downloadUrl(rootdir, url, orgid, page): html = session.get(url, headers=global_config.HEADERS, verify=False) if(html.text[1:7] == 'script'): print(html.text) return "err" if(len(html.text) < 60): return "err" sample = open(rootdir + "/" + str(orgid) + '_' + str(page) + ".html", "w", encoding='utf-8') sample.write(html.text) sample.close() return 'ok'
def scrapy_by_file(json_file_name): #讀取JSON文件的內容 text = open(json_file_name, encoding='utf-8').read() #特殊處理,去除從WINDOWS系統帶過來的BOM特殊字符 if text.startswith(u'\ufeff'): text = text.encode('utf8')[3:].decode('utf8') #將文本內容的JSON數據轉換成自定義的JSON對象 try: json_data = json.loads(text) except: print(json_file_name) return for row in json_data['rows']: def scrapy_by_row(row): try: orgid = row['organization']['id'] familyid = row['censusRegisterFamily']['id'] except: print('errrr') return scrapy_by_row(row)
#遍歷目錄(rootdir) 遍歷到的每個文件都執行dirFunc def waklThroughDir(rootdir, dirFunc): for parent, dirnames, filenames in os.walk(rootdir): for filename in filenames: print(filename) #獲取后綴為txt的文件 if(filename.split('.')[-1] == 'html'): dirFunc(os.path.join(parent, filename))
# -*- coding: utf-8 -*- import re import requests import time #-----------------------------用于解析的正則表達式常量------------------------------------------------------------------ #解析頁數 PAGE_NUM = '共找到 (.*?) 符合條件的記錄' #解析小區名稱 NAME = 'texttext_title"><ahref(.*?)</a></div><divclass="texttext_moreinfo">' #解析小區價格 PRICE = 'class="hot_price">(.*?)</span>' #解析小區地址 ADDRESS = 'text_moreinfo">(.*?)</div><divclass="texttext_moreinfo"><span>' #文件生成路徑 ROOTDIR = 'F:\\test\\' #-----------------------------模擬請求的頭部信息,否則將被識別出是程序抓包而被攔截-------------------------------------- HEADERS = { 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8', 'Accept-Encoding': 'gzip, deflate, sdch', 'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/51.0.2704.106 Safari/537.36', 'Host': 'www.0577home.net', 'Upgrade-Insecure-Requests': '1' } #-----------------------------抓取某一頁的房產信息,pageNo為頁號-------------------------------------------------------- def getHouseListByPageno(pageNo): #建立一個連接用于后續發起請求 session = requests.session() url = 'http://www.0577home.net/xiaoqu/list_0_0_0_0_0_0_0_' + str(pageNo) + '.html' houseList = session.get(url, headers = HEADERS, verify = False) #以寫入模式打開文件 fh = open(ROOTDIR + "houseList_pageNo" + str(pageNo) + ".txt",? 'w' ,encoding='utf-8') #將movieList寫入文件 fh.write(houseList.text) #關閉文件 fh.close() #-------------------------------獲取需要抓取的頁面總數------------------------------------------------------------------ def getPageNum(): #打開已經下載好的第一頁房產內容 f = open(ROOTDIR + 'houseList_pageNo1.txt', encoding='utf-8') #獲取文件內容 rawContent = f.read() #用正則表達式解析頁面內容 pageNum = re.findall(PAGE_NUM, rawContent) #返回頁面號 return int(pageNum[0]) / 20 + 1 def parseHouseListToFile(srcFile, dstFile): #打開待解析的文件 f = open(srcFile, encoding='utf-8') #讀取文件內容以備解析 rawContent = f.read() p = re.compile('\s+') content = re.sub(p, '', rawContent) dnames = re.findall(NAME, content) names = [] for dname in dnames: idx = dname.rfind('>') names.append(dname[idx + 1:]) prices = re.findall(PRICE, content) daddress = re.findall(ADDRESS, content) address = [] for daddr in daddress: id = daddr.rfind('>') address.append(daddr[id + 1:]) i = 0 for x in names: #寫入時用'$'做分割,結尾加上回車符 dstFile.write(names[i] + '$' + prices[i] + '$' + address[i] + '\n') i = i + 1 #-------------------------------主函數,下載并解析房產信息-------------------------------------------------------------- if __name__ == '__main__': #---------------------抓取頁面----------------------------- #抓取第一頁房產信息 getHouseListByPageno(1) #通過第一頁房產信息獲取總共要抓取的頁面數量 pageNum = getPageNum() #抓取剩余的頁面 for i in range(2, int(pageNum) + 1): getHouseListByPageno(str(i)) #---------------------解析頁面----------------------------- #獲取當前年月日 localtime = time.strftime('%Y%m%d', time.localtime(time.time())) #創建一個文件,文件名前面帶上年月日 f = open(ROOTDIR + localtime + '_houseList.txt', 'a+', encoding='utf-8') #解析所有的頁面 #for k in range(1, int(pageNum) + 1): for k in range(1, 115): parseHouseListToFile(ROOTDIR + "houseList_pageNo" + str(k) + ".txt", f) #關閉文件 f.close()
# -*- coding: utf-8 -*- import re import requests import time import os #-----------------------------用于解析的正則表達式常量------------------------------------------------------------------ #解析頁數 PAGE_NUM = '共找到 (.*?) 符合條件的記錄' #解析小區名稱 NAME = 'texttext_title"><ahref(.*?)</a></div><divclass="texttext_moreinfo">' #解析小區價格 PRICE = 'class="hot_price">(.*?)</span>' #解析小區地址 ADDRESS = 'text_moreinfo">(.*?)</div><divclass="texttext_moreinfo"><span>' #解析小區編號 ID = 'class="picdiv_left"><ahref="http://www.0577home.net/xiaoqu/(.*?).html' #解析小區所屬區域 LOCATION = '<div><a>所屬區域:</a><span>(.*?)</span></div>' #解析小區占地面積 AREA = '<div><a>占地面積:</a><span>(.*?)</span></div>' #解析小區綠化率 GREENINGRATE = '<div><a>綠化率:</a><span>(.*?)</span></div>' #解析小區樓總數 LAYER = '<div><a>樓總數:</a><span>(.*?)</span></div>' #解析小區物業類型 TYPE = '<div><a>物業類型:</a><span>(.*?)</span></div>' #解析小區所屬小學 PRIMARYSCHOOL = '<div><a>所屬小學:</a><span>(.*?)</span></div>' #解析小區總建筑面積 BUILDINGAREA = '<div><a>總建筑面積:</a><span>(.*?)</span></div>' #解析小區容積率 PLOTRATIO = '<div><a>容積率:</a><span>(.*?)</span></div>' #解析小區開發商 DEVEPLOPER = '<div><a>開發商:</a><span>(.*?)</span></div>' #文件生成路徑 ROOTDIR = 'F:\\test\\' #-----------------------------模擬請求的頭部信息,否則將被識別出是程序抓包而被攔截-------------------------------------- HEADERS = { 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8', 'Accept-Encoding': 'gzip, deflate, sdch', 'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/51.0.2704.106 Safari/537.36', 'Host': 'www.0577home.net', 'Upgrade-Insecure-Requests': '1' } #-----------------------------抓取某一頁的房產信息,pageNo為頁號-------------------------------------------------------- def getHouseListByPageno(pageNo): #建立一個連接用于后續發起請求 session = requests.session() url = 'http://www.0577home.net/xiaoqu/list_0_0_0_0_0_0_0_' + str(pageNo) + '.html' houseList = session.get(url, headers = HEADERS, verify = False) #以寫入模式打開文件 fh = open(ROOTDIR + "houseList_pageNo" + str(pageNo) + ".txt",? 'w' ,encoding='utf-8') #將movieList寫入文件 fh.write(houseList.text) #關閉文件 fh.close() def getHouseInfoByPageno(pageNo, k): if(os.path.exists(ROOTDIR + "houseInfo_pageNo" + str(pageNo) + ".html")): return print('downloading !, count %s, page %s' % (str(k), str(pageNo))) #建立一個連接用于后續發起請求 session = requests.session() url = 'http://www.0577home.net/xiaoqu/detail_' + str(pageNo) + '.html' houseList = session.get(url, headers = HEADERS, verify = False) #以寫入模式打開文件 fh = open(ROOTDIR + "houseInfo_pageNo" + str(pageNo) + ".html",? 'w' ,encoding='utf-8') #將movieList寫入文件 fh.write(houseList.text) #關閉文件 fh.close() #-------------------------------獲取需要抓取的頁面總數------------------------------------------------------------------ def getPageNum(): #打開已經下載好的第一頁房產內容 f = open(ROOTDIR + 'houseList_pageNo1.txt', encoding='utf-8') #獲取文件內容 rawContent = f.read() #用正則表達式解析頁面內容 pageNum = re.findall(PAGE_NUM, rawContent) #返回頁面號 return int(pageNum[0]) / 20 + 1 def parseHouseInfo(srcFile): #打開待解析的文件 f = open(srcFile, encoding='utf-8') #讀取文件內容以備解析 content = f.read() # p = re.compile('\s+') # content = re.sub(p, '', rawContent) location = re.findall(LOCATION, content)[0] location = location.split(' ') category1 = location[0] category2 = location[1] area = re.findall(AREA, content)[0] greeningrate = re.findall(GREENINGRATE, content)[0] layer = re.findall(LAYER, content)[0] type = re.findall(TYPE, content)[0] primaryschool = re.findall(PRIMARYSCHOOL, content)[0] buildingarea = re.findall(BUILDINGAREA, content)[0] plotratio = re.findall(PLOTRATIO, content)[0] developer = re.findall(DEVEPLOPER, content)[0] f.close() return (category1, category2, area, greeningrate, layer, type, primaryschool, buildingarea, plotratio, developer) def parseHouseListToFile(srcFile, dstFile): #打開待解析的文件 f = open(srcFile, encoding='utf-8') #讀取文件內容以備解析 rawContent = f.read() p = re.compile('\s+') content = re.sub(p, '', rawContent) dnames = re.findall(NAME, content) names = [] for dname in dnames: idx = dname.rfind('>') names.append(dname[idx + 1:]) prices = re.findall(PRICE, content) daddress = re.findall(ADDRESS, content) ids = re.findall(ID, content) address = [] for daddr in daddress: id = daddr.rfind('>') address.append(daddr[id + 1:]) i = 0 f.close() for x in names: #寫入時用'$'做分割,結尾加上回車符 dstFile.write(names[i] + '$' + prices[i] + '$' + address[i] + '$' + ids[i] + '\n') i = i + 1 #-------------------------------主函數,下載并解析房產信息-------------------------------------------------------------- if __name__ == '__main__': #---------------------抓取頁面----------------------------- #抓取第一頁房產信息 # getHouseListByPageno(1) # #通過第一頁房產信息獲取總共要抓取的頁面數量 # pageNum = getPageNum() # #抓取剩余的頁面 # for i in range(2, int(pageNum) + 1): #? ? getHouseListByPageno(str(i)) #---------------------解析頁面----------------------------- #獲取當前年月日 localtime = time.strftime('%Y%m%d', time.localtime(time.time())) #創建一個文件,文件名前面帶上年月日 f = open(ROOTDIR + localtime + '_houseList.txt', 'a+', encoding='utf-8') #解析所有的頁面 #for k in range(1, int(pageNum) + 1): for k in range(1, 115): parseHouseListToFile(ROOTDIR + "houseList_pageNo" + str(k) + ".txt", f) #關閉文件 f.close() f = open(ROOTDIR + localtime + '_houseList.txt', encoding='utf-8') fd = open(ROOTDIR + localtime + '_houseInfo.txt', 'w', encoding='utf-8') k = 0 for line in f: data = line.strip('\n') data = data.split('$') idx = data[3] getHouseInfoByPageno(idx, k) houseInfo = parseHouseInfo(ROOTDIR + "houseInfo_pageNo" + str(idx) + ".html") print(str(k) + "$".join(data) + '$' + "$".join(houseInfo)) fd.write("$".join(data) + '$' + "$".join(houseInfo) + '\n') k += 1 f.close() fd.close()
with open('job.csv', 'r') as f: reader = csv.reader(f) for row in reader: print(row)
#創建CSV文件并寫入第一行 def createCsv(file): if not os.path.exists(file): csvfile = open(file, 'a+', encoding='utf-8', newline='') writer = csv.writer(csvfile) writer.writerow(paramname) else: csvfile = open(file, 'a+', newline='') writer = csv.writer(csvfile) return writer
import sys import jpype name = sys.argv[1] jarpath = '/home/dsadm/why/python' jpype.startJVM(jpype.getDefaultJVMPath(), "-Djava.ext.dirs=%s" % jarpath) DECRYPT = jpype.JClass('why.fmrt.decrypt.DECRYPT') upperName =DECRYPT.decrypt(name) print(upperName) jpype.shutdownJVM()
from urllib.request import urlretrieve from urllib.request import urlopen from bs4 import BeautifulSoup import subprocess import requests from PIL import Image from PIL import ImageOps def cleanImage(imagePath): image = Image.open(imagePath) image = image.point(lambda x: 0 if x<143 else 255) borderImage = ImageOps.expand(image,border=20,fill='white') borderImage.save(imagePath) html = urlopen("http://www.pythonscraping.com/humans-only") bsObj = BeautifulSoup(html, "html.parser") #Gather prepopulated form values imageLocation = bsObj.find("img", {"title": "Image CAPTCHA"})["src"] formBuildId = bsObj.find("input", {"name":"form_build_id"})["value"] captchaSid = bsObj.find("input", {"name":"captcha_sid"})["value"] captchaToken = bsObj.find("input", {"name":"captcha_token"})["value"] captchaUrl = "http://pythonscraping.com"+imageLocation urlretrieve(captchaUrl, "captcha.jpg") cleanImage("captcha.jpg") p = subprocess.Popen(["tesseract", "captcha.jpg", "captcha"], stdout= subprocess.PIPE,stderr=subprocess.PIPE) p.wait() f = open("captcha.txt", "r") #Clean any whitespace characters captchaResponse = f.read().replace(" ", "").replace("\n", "") print("Captcha solution attempt: "+captchaResponse) if len(captchaResponse) == 5: params = {"captcha_token":captchaToken, "captcha_sid":captchaSid, "form_id":"comment_node_page_form", "form_build_id": formBuildId, "captcha_response":captchaResponse, "name":"Ryan Mitchell", "subject": "I come to seek the Grail", "comment_body[und][0][value]": "...and I am definitely not a bot"} r = requests.post("http://www.pythonscraping.com/comment/reply/10", data=params) responseObj = BeautifulSoup(r.text) if responseObj.find("div", {"class":"messages"}) is not None: print(responseObj.find("div", {"class":"messages"}).get_text()) else: print("There was a problem reading the CAPTCHA correctly!")
from selenium import webdriver from selenium.webdriver.support.ui import WebDriverWait from selenium.webdriver.common.action_chains import ActionChains import PIL.Image as image import time,re, random import requests try: from StringIO import StringIO except ImportError: from io import StringIO #爬蟲模擬的瀏覽器頭部信息 agent = 'Mozilla/5.0 (Windows NT 5.1; rv:33.0) Gecko/20100101 Firefox/33.0' headers = { 'User-Agent': agent } # 根據位置對圖片進行合并還原 # filename:圖片 # location_list:圖片位置 #內部兩個圖片處理函數的介紹 #crop函數帶的參數為(起始點的橫坐標,起始點的縱坐標,寬度,高度) #paste函數的參數為(需要修改的圖片,粘貼的起始點的橫坐標,粘貼的起始點的縱坐標) def get_merge_image(filename,location_list): #打開圖片文件 im = image.open(filename) #創建新的圖片,大小為260*116 new_im = image.new('RGB', (260,116)) im_list_upper=[] im_list_down=[] # 拷貝圖片 for location in location_list: #上面的圖片 if location['y']==-58: im_list_upper.append(im.crop((abs(location['x']),58,abs(location['x'])+10,166))) #下面的圖片 if location['y']==0: im_list_down.append(im.crop((abs(location['x']),0,abs(location['x'])+10,58))) new_im = image.new('RGB', (260,116)) x_offset = 0 #黏貼圖片 for im in im_list_upper: new_im.paste(im, (x_offset,0)) x_offset += im.size[0] x_offset = 0 for im in im_list_down: new_im.paste(im, (x_offset,58)) x_offset += im.size[0] return new_im #下載并還原圖片 # driver:webdriver # div:圖片的div def get_image(driver,div): #找到圖片所在的div background_images=driver.find_elements_by_xpath(div) location_list=[] imageurl='' #圖片是被CSS按照位移的方式打亂的,我們需要找出這些位移,為后續還原做好準備 for background_image in background_images: location={} #在html里面解析出小圖片的url地址,還有長高的數值 location['x']=int(re.findall("background-image: url\(\"(.*)\"\); background-position: (.*)px (.*)px;",background_image.get_attribute('style'))[0][1]) location['y']=int(re.findall("background-image: url\(\"(.*)\"\); background-position: (.*)px (.*)px;",background_image.get_attribute('style'))[0][2]) imageurl=re.findall("background-image: url\(\"(.*)\"\); background-position: (.*)px (.*)px;",background_image.get_attribute('style'))[0][0] location_list.append(location) #替換圖片的后綴,獲得圖片的URL imageurl=imageurl.replace("webp","jpg") #獲得圖片的名字 imageName = imageurl.split('/')[-1] #獲得圖片 session = requests.session() r = session.get(imageurl, headers = headers, verify = False) #下載圖片 with open(imageName, 'wb') as f: f.write(r.content) f.close() #重新合并還原圖片 image=get_merge_image(imageName, location_list) return image #對比RGB值 def is_similar(image1,image2,x,y): pass #獲取指定位置的RGB值 pixel1=image1.getpixel((x,y)) pixel2=image2.getpixel((x,y)) for i in range(0,3): # 如果相差超過50則就認為找到了缺口的位置 if abs(pixel1[i]-pixel2[i])>=50: return False return True #計算缺口的位置 def get_diff_location(image1,image2): i=0 # 兩張原始圖的大小都是相同的260*116 # 那就通過兩個for循環依次對比每個像素點的RGB值 # 如果相差超過50則就認為找到了缺口的位置 for i in range(0,260): for j in range(0,116): if is_similar(image1,image2,i,j)==False: return? i #根據缺口的位置模擬x軸移動的軌跡 def get_track(length): pass list=[] #間隔通過隨機范圍函數來獲得,每次移動一步或者兩步 x=random.randint(1,3) #生成軌跡并保存到list內 while length-x>=5: list.append(x) length=length-x x=random.randint(1,3) #最后五步都是一步步移動 for i in range(length): list.append(1) return list #滑動驗證碼破解程序 def main(): #打開火狐瀏覽器 driver = webdriver.Firefox() #用火狐瀏覽器打開網頁 driver.get("http://www.geetest.com/exp_embed") #等待頁面的上元素刷新出來 WebDriverWait(driver, 30).until(lambda the_driver: the_driver.find_element_by_xpath("//div[@class='gt_slider_knob gt_show']").is_displayed()) WebDriverWait(driver, 30).until(lambda the_driver: the_driver.find_element_by_xpath("//div[@class='gt_cut_bg gt_show']").is_displayed()) WebDriverWait(driver, 30).until(lambda the_driver: the_driver.find_element_by_xpath("//div[@class='gt_cut_fullbg gt_show']").is_displayed()) #下載圖片 image1=get_image(driver, "//div[@class='gt_cut_bg gt_show']/div") image2=get_image(driver, "//div[@class='gt_cut_fullbg gt_show']/div") #計算缺口位置 loc=get_diff_location(image1, image2) #生成x的移動軌跡點 track_list=get_track(loc) #找到滑動的圓球 element=driver.find_element_by_xpath("//div[@class='gt_slider_knob gt_show']") location=element.location #獲得滑動圓球的高度 y=location['y'] #鼠標點擊元素并按住不放 print ("第一步,點擊元素") ActionChains(driver).click_and_hold(on_element=element).perform() time.sleep(0.15) print ("第二步,拖動元素") track_string = "" for track in track_list: #不能移動太快,否則會被認為是程序執行 track_string = track_string + "{%d,%d}," % (track, y - 445) #xoffset=track+22:這里的移動位置的值是相對于滑動圓球左上角的相對值,而軌跡變量里的是圓球的中心點,所以要加上圓球長度的一半。 #yoffset=y-445:這里也是一樣的。不過要注意的是不同的瀏覽器渲染出來的結果是不一樣的,要保證最終的計算后的值是22,也就是圓球高度的一半 ActionChains(driver).move_to_element_with_offset(to_element=element, xoffset=track+22, yoffset=y-445).perform() #間隔時間也通過隨機函數來獲得,間隔不能太快,否則會被認為是程序執行 time.sleep(random.randint(10,50)/100) print (track_string) #xoffset=21,本質就是向后退一格。這里退了5格是因為圓球的位置和滑動條的左邊緣有5格的距離 ActionChains(driver).move_to_element_with_offset(to_element=element, xoffset=21, yoffset=y-445).perform() time.sleep(0.1) ActionChains(driver).move_to_element_with_offset(to_element=element, xoffset=21, yoffset=y-445).perform() time.sleep(0.1) ActionChains(driver).move_to_element_with_offset(to_element=element, xoffset=21, yoffset=y-445).perform() time.sleep(0.1) ActionChains(driver).move_to_element_with_offset(to_element=element, xoffset=21, yoffset=y-445).perform() time.sleep(0.1) ActionChains(driver).move_to_element_with_offset(to_element=element, xoffset=21, yoffset=y-445).perform() print ("第三步,釋放鼠標") #釋放鼠標 ActionChains(driver).release(on_element=element).perform() time.sleep(3) #點擊驗證 # submit = driver.find_element_by_xpath("//div[@class='gt_ajax_tip success']") # print(submit.location) # time.sleep(5) #關閉瀏覽器,為了演示方便,暫時注釋掉. #driver.quit() #主函數入口 if __name__ == '__main__': pass main()
import os import tornado.httpserver import tornado.ioloop import tornado.options import tornado.web from view import * from tornado.options import define, options define("port", default=8000, help="run on the given port", type=int) class Application(tornado.web.Application): def __init__(self): handlers = [ (r"/", Indexhandler), ] settings = dict( template_path=os.path.join(os.path.dirname(__file__), 'templates'), autoescape=None, debug=False, ) tornado.web.Application.__init__(self, handlers, **settings) if __name__ == "__main__": tornado.options.parse_command_line() http_server = tornado.httpserver.HTTPServer(Application(), xheaders=True) http_server.listen(options.port) tornado.ioloop.IOLoop.instance().start()
#! /usr/bin/env python # coding=utf-8 import time, os, sched # 第一個參數確定任務的時間,返回從某個特定的時間到現在經歷的秒數 # 第二個參數以某種人為的方式衡量時間 schedule = sched.scheduler(time.time, time.sleep) def perform_command(cmd, inc): # 安排inc秒后再次運行自己,即周期運行 schedule.enter(inc, 0, perform_command, (cmd, inc)) os.system(cmd) def timming_exe(cmd, inc=60): # enter用來安排某事件的發生時間,從現在起第n秒開始啟動 schedule.enter(inc, 0, perform_command, (cmd, inc)) # 持續運行,直到計劃時間隊列變成空為止 schedule.run() #每隔一天調用getMovieList.py程序 timming_exe("getMovieList.py", 60 * 60 * 24)
from urllib.request import urlopen from urllib.parse import urlencode from urllib.error import URLError import json class xBaiduMap: def __init__(self, key='mgf2Gxr7EgnfPVQnpClZnsug'): self.host = 'http://api.map.baidu.com' self.path = '/geocoder?' self.param = {'address': None, 'output': 'json', 'key': key, 'location': None, 'city': None} def getLocation(self, address, city=None): rlt = self.geocoding('address', address, city) if rlt != None: l = rlt['result'] if isinstance(l, list): return None return l['location']['lat'], l['location']['lng'] def getAddress(self, lat, lng): rlt = self.geocoding('location', "{0},{1}".format(lat, lng)) if rlt != None: l = rlt['result'] #return l['formatted_address'] # Here you can get more details about the location with 'addressComponent' key ld=rlt['result']['addressComponent'] return (ld['city']+';'+ld['district']+';'+ld['street']+";"+ld['street_number']) def geocoding(self, key, value, city=None): if key == 'location': if 'city' in self.param: del self.param['city'] if 'address' in self.param: del self.param['address'] elif key == 'address': if 'location' in self.param: del self.param['location'] if city == None and 'city' in self.param: del self.param['city'] else: self.param['city'] = city self.param[key] = value try: r = urlopen(self.host + self.path + urlencode(self.param)).read() except URLError: print ("URLError") return None str_response = r.decode('utf-8') rlt = json.loads(str_response) if rlt['status'] == 'OK': return rlt else: print ("Decoding Failed") return None
import multiprocessing for process_id in range(PROCESS_NUM): p = multiprocessing.Process(target=worker, args=(process_id,)) jobs.append(p) p.start()
def split_file(file_name, file_num): #文件已經存在 if(os.path.exists("split_0.txt")): return #統計文件的總行數 count = -1 file = open(file_name, encoding='utf-8') for count, line in enumerate(file): pass count += 1 file.close() #每個文件的行數 count_per_file = count / file_num #創建file_num個新文件 for i in range(file_num): file = open("split_" + str(i) + ".txt", 'w', encoding='utf-8') file.close() #分割成file_num個新文件 file = open(file_name, encoding='utf-8') count = -1 for count, line in enumerate(file): file_index = (int)(count /count_per_file) sub_file = open("split_" + str(file_index) + ".txt", "a+", encoding='utf-8') if(sub_file != None): sub_file.write(line)
import ibm_db con = ibm_db.connect("DATABASE=FMRT;HOSTNAME=XX.XX.XX.XX;PORT=60000;PORTOCOL=TCPIP;UID=db2inst1;PWD=db2inst1;", "",?"") sql = getSql(inputfile) stmt = ibm_db.exec_immediate(con, sql) result = ibm_db.fetch_both(stmt) rowidx = 0 while (result): #DO SOMETHING result = ibm_db.fetch_both(stmt) ibm_db.close(con)
import jieba seg_list = jieba.cut("我來到北京清華大學", cut_all=True) for line in seg_list: print(line) print("Full Mode: " + "/ ".join(seg_list))? # 全模式 seg_list = jieba.cut("我來到北京清華大學", cut_all=False) print("Default Mode: " + "/ ".join(seg_list))? # 精確模式 seg_list = jieba.cut("他來到了網易杭研大廈")? # 默認是精確模式 print(", ".join(seg_list)) seg_list = jieba.cut_for_search("小明碩士畢業于中國科學院計算所,后在日本京都大學深造")? # 搜索引擎模式 print(", ".join(seg_list))
import calendar import sys def isMonthEnd(datetime): year = int(datetime[0:4]) month = int(datetime[4:6]) day = int(datetime[6:8]) wday, monthrange = calendar.monthrange(year, month) if(day == monthrange): return 1 else: return 0 isMonthEnd(sys.argv[1])
cmd = "sed ':a;N;$ s/\\r\\n//g;ba' " + oldfile + " > " + newfile os.system(cmd)
# -*- coding: utf-8 -*- """ thread ~~~~~~~~~~~~~~~~ Thread framework :copyright: (c) 2016 by why. :license: MIT, see LICENSE for more details. """ import threading class Threadconfig(): def __init__(self, thread_size): self.thread_size = thread_size def topen(self): self.thread_tasks = [] def build(self, func, **kwargs): self.thread_task = threading.Thread(target=func, kwargs=(kwargs)) self.thread_tasks.append(self.thread_task) def run(self): for thread_task in self.thread_tasks: thread_task.setDaemon(True) thread_task.start() while 1: alive = False for thread_num in range(0, self.thread_size): alive = alive or self.thread_tasks[thread_num].isAlive() if not alive: break def __del__(self): self.thread_tasks = []
pip install *.wheel
轉載于:https://www.cnblogs.com/kernel521/p/6855359.html
總結
以上是生活随笔 為你收集整理的python的一些常用操作 的全部內容,希望文章能夠幫你解決所遇到的問題。
如果覺得生活随笔 網站內容還不錯,歡迎將生活随笔 推薦給好友。