day_02
一requests?請求庫爬取豆瓣電影信息
一請求url
https:?/?/movie?.?douban.?com/?top250
一
請求方式
GET
請求頭
user-?-agentcookies import requests import re def get_page(url):response=requests.get(url)return responsedef parse_index(html):movie_list=re.findall('<div class="item">.*?<em class="">(.*?)</em>.*?<a href="(.*?)">.*?<span class="title">(.*?)</span>.*?導演:(.*?)主演:(.*?)<br>(.*?)</p>.*?<span class="rating_num".*?>(.*?)</span>.*?<span>(.*?)</span>.*?<span class="inq">(.*?)</span>',html,re.S)return movie_listdef save_data(movie):top,m_url,name,daoyan,actor,year_type,point,commit,desc=movieyear_type=year_type.strip('\n')data=f'''===================電影排名:{top}電影url:{m_url}電影名稱:{name}電影導演:{daoyan}電影主演:{actor}年份類型:{year_type}電影評分:{point}電影評論:{commit}電影簡介:{desc}==================\n\n'''print(data)with open('douban_top250.txt','a',encoding='utf-8') as f:f.write(data)print(f'電影:{name}寫入成功...') if __name__ == '__main__':num=0for line in range(10):url=f'https://movie.douban.com/top250?start={num}&filter='num+=25print(url)index_res=get_page(url)movie_list=parse_index(index_res.text)for movie in movie_list:save_data(movie)
一請求url
https:?/?/movie?.?douban.?com/?top250
一
請求方式
GET
請求頭
user-?-agentcookies import requests import re def get_page(url):response=requests.get(url)return responsedef parse_index(html):movie_list=re.findall('<div class="item">.*?<em class="">(.*?)</em>.*?<a href="(.*?)">.*?<span class="title">(.*?)</span>.*?導演:(.*?)主演:(.*?)<br>(.*?)</p>.*?<span class="rating_num".*?>(.*?)</span>.*?<span>(.*?)</span>.*?<span class="inq">(.*?)</span>',html,re.S)return movie_listdef save_data(movie):top,m_url,name,daoyan,actor,year_type,point,commit,desc=movieyear_type=year_type.strip('\n')data=f'''===================電影排名:{top}電影url:{m_url}電影名稱:{name}電影導演:{daoyan}電影主演:{actor}年份類型:{year_type}電影評分:{point}電影評論:{commit}電影簡介:{desc}==================\n\n'''print(data)with open('douban_top250.txt','a',encoding='utf-8') as f:f.write(data)print(f'電影:{name}寫入成功...') if __name__ == '__main__':num=0for line in range(10):url=f'https://movie.douban.com/top250?start={num}&filter='num+=25print(url)index_res=get_page(url)movie_list=parse_index(index_res.text)for movie in movie_list:save_data(movie)
二、selenium請求庫??
? ? ? ?1.京東搜索
from selenium import webdriver from selenium.webdriver.common.by import By from selenium.webdriver.common.keys import Keys from selenium.webdriver.support import expected_conditions as EC from selenium.webdriver.support.wait import WebDriverWaitimport time driver=webdriver.Chrome(r'C:\Users\Administrator\Desktop\chromedriver.exe') try:driver.get('https://www.jd.com/')wait=WebDriverWait(driver,10)input_tag=wait.until(EC.presence_of_element_located((By.ID,'key')))time.sleep(5)input_tag.send_keys('公仔')input_tag.send_keys(Keys.ENTER)time.sleep(20) finally:driver.close()?2.百度登錄
from selenium import webdriver # web驅動 from selenium.webdriver.common.keys import Keys # 鍵盤按鍵操作 import timeimport timedriver = webdriver.Chrome(r'C:\Users\Administrator\Desktop\chromedriver.exe')try:# 隱式等待: 需要在get之前調用# 等待任意元素加載10秒driver.implicitly_wait(10)driver.get('https://www.baidu.com/')# 顯式等待: 需要在get之后調用time.sleep(5)'''===============所有方法===================element是查找一個標簽elements是查找所有標簽'''# 自動登錄百度 start# 1、find_element_by_link_text # 通過鏈接文本去找login_link = driver.find_element_by_link_text('登錄')login_link.click() # 點擊登錄 time.sleep(1)# 2、find_element_by_id # 通過id去找user_login = driver.find_element_by_id('TANGRAM__PSP_10__footerULoginBtn')user_login.click()time.sleep(1)# 3、find_element_by_class_nameuser = driver.find_element_by_class_name('pass-text-input-userName')user.send_keys('*******')# 4、find_element_by_namepwd = driver.find_element_by_name('password')pwd.send_keys('*******')submit = driver.find_element_by_id('TANGRAM__PSP_10__submit')submit.click()# 5、find_element_by_tag_namediv = driver.find_elements_by_tag_name('div')print(div)time.sleep(20)finally:# 關閉瀏覽器釋放操作系統資源driver.close()?
轉載于:https://www.cnblogs.com/ZHKsuika/p/11119603.html
與50位技術專家面對面20年技術見證,附贈技術全景圖總結
- 上一篇: 女生初次适合开什么店 推荐市场最需要
- 下一篇: 2019年旗舰手机价格上涨怎么回事 5G