bs4爬取网页基础
import requests
from bs4 import BeautifulSoupdef getSOUP(url):try:r = requests.get(url, timeout=30)r.raise_for_status()# print(r.text) # 代碼很亂soup = BeautifulSoup(r.text, "html.parser") # parser分析器、解析器# print(soup.prettify()) # 美化后的網頁代碼print(soup.a) # 獲取標簽內容print(soup.a.name) # 獲取標簽名字print(soup.a.parent.name)print(soup.a.parent.parent.name)tag = soup.aprint(type(tag)) # tag屬性print(tag.attrs) # 打印屬性,無論存在屬性都會返回一個字典類型print(type(tag.attrs)) # 打印屬性類型print(tag.attrs['class']) # 打印class屬性的屬性值print(type(tag.attrs['class']))print(tag.string) # 獲取字符串except expression as identifier:print("錯誤")if __name__ == "__main__":url = "https://python123.io/ws/demo.html"getSOUP(url)
?
總結
- 上一篇: 百度和360的关键词提交查询
- 下一篇: Python爬取京东商品