python扇贝单词书_Python脚本 扇贝单词书爬取
這是一個·用于爬取扇貝單詞書的腳本
將在.py文件目錄得到一個名為out.txt的輸出文件
主要使用了selenium庫(webdriver)
使用方式:
更改
13行中指向webdriver驅動器 代碼中使用了firefox提供的驅動器
Path = r’C:\Users\pc\Downloads\geckodriver-v0.19.1-win64\geckodriver.exe’
15行中的單詞書網頁根目錄
rootdir=“https://www.shanbay.com/wordbook/6403/”
運行,并且贊美太陽
source code:
# coding=utf-8
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.support.ui import Select
from selenium.common.exceptions import NoSuchElementException
from selenium.common.exceptions import NoAlertPresentException
import unittest, time, re
import sys
reload(sys)
sys.setdefaultencoding('utf-8')
Path = r'C:\Users\pc\Downloads\geckodriver-v0.19.1-win64\geckodriver.exe'
f = open("out.txt", "w")
rootdir=“https://www.shanbay.com/wordbook/6403/”
class ShanbeiWord(unittest.TestCase):
def setUp(self):
self.driver = webdriver.Firefox(executable_path=Path)
self.driver.implicitly_wait(30)
self.verificationErrors = []
self.accept_next_alert = True
def test_shanbei_word(self):
s = " "
driver = self.driver
i = 1
while i<12:
driver.get(
driver.find_element_by_xpath("/html/body/div[3]/div/div[1]/div/div[4]/div[7]/div["+str(i)+"]/div[1]/table/tbody/tr/td[1]/a").click()
i=i+1
j=1
while j<10:
#f.write(driver.page_source)
s = str(s)
s=s+str(driver.page_source)
#f.write(str(i)+"+++"+str(j))
driver.find_element_by_link_text(">").click()
j=j+1
print(str(i) + "+++" + str(j))
#f.write(driver.page_source)
s=str(s)
s = s + str(driver.page_source)
s = str(re.findall(r'g>.*', s, flags=0))
s = str(re.findall(r'>.*?<', s, flags=0))
f.write(s)
def is_element_present(self, how, what):
try:
self.driver.find_element(by=how, value=what)
except NoSuchElementException as e:
return False
return True
def is_alert_present(self):
try:
self.driver.switch_to_alert()
except NoAlertPresentException as e:
return False
return True
def close_alert_and_get_its_text(self):
try:
alert = self.driver.switch_to_alert()
alert_text = alert.text
if self.accept_next_alert:
alert.accept()
else:
alert.dismiss()
return alert_text
finally:
self.accept_next_alert = True
def tearDown(self):
self.driver.quit()
self.assertEqual([], self.verificationErrors)
if __name__ == "__main__":
unittest.main()
贊過:
贊 正在加載……
相關
總結
以上是生活随笔為你收集整理的python扇贝单词书_Python脚本 扇贝单词书爬取的全部內容,希望文章能夠幫你解決所遇到的問題。
- 上一篇: 三星S23全系渲染图曝光 这外观设计看上
- 下一篇: 快手怎么退出青少年模式