基于Jsoup爬虫Demo
生活随笔
收集整理的這篇文章主要介紹了
基于Jsoup爬虫Demo
小編覺得挺不錯的,現在分享給大家,幫大家做個參考.
?今天寫了一個爬蟲跟大家分享一下,該爬蟲為簡單爬蟲,后續會跟大家分享難一些的爬蟲,話不多說,直接上代碼。如果有疑問,可以直接評論。。。。。
package com.analysis;import java.io.IOException; import java.util.ArrayList; import java.util.HashMap; import java.util.List; import java.util.Map;import org.jsoup.Jsoup; import org.jsoup.nodes.Document; import org.jsoup.nodes.Element; import org.jsoup.select.Elements;import com.dao.FriendLinkDao;public class SnatchSHUJUJU {public static Document getDocument (String url){try {//5000是設置連接超時時間,單位msreturn Jsoup.connect(url).timeout(5000).get();} catch (IOException e) {e.printStackTrace();}return null;}public static List<String> getEveryOtherUrl(){List<String> urlList = new ArrayList<>(); String host = "http://www.shujuju.cn";String url = "http://www.shujuju.cn/navigation/navigationPage";Document document = getDocument(url);Elements elements1 = document.select("[class=more fr]");Elements elements2 = elements1.select("a[href]");for(Element element : elements2){String string = host+element.attr("href");urlList.add(string);}return urlList;}public static List<Map> getDetailUrl(List<String> list){List <Map> mapList = new ArrayList<>();for(String url:list){Document document = getDocument(url);Elements elements1 = document.select("[class=nav-sort-info]");String channelName = elements1.get(0).select("h4").text();System.out.println("channelName:"+channelName);Elements elements2 = elements1.select("[class=nav-sort-body clearfix]").select("a");for(Element element : elements2){Map<String,String> map = new HashMap<>();String linkUrl = element.attr("href");String name = element.text();System.out.println("linkUrl:"+linkUrl);System.out.println("name:"+name);map.put("channelName", channelName);map.put("linkUrl", linkUrl);map.put("name", name);mapList.add(map);}}return mapList;}public static void main(String[] args) {List<Map> list = getDetailUrl(getEveryOtherUrl());FriendLinkDao friendDao = new FriendLinkDao();for(Map map:list){String channelName = map.get("channelName").toString();Integer channelId = friendDao.getChannelId(channelName);if(channelId != -1){System.out.println("channelId: " + channelId);map.put("channelId", channelId);map.put("stat", "1");friendDao.insertFriendLink(map);}else {friendDao.insertChannelName(channelName, 1);channelId = friendDao.getChannelId(channelName);System.out.println("channelId: " + channelId);map.put("channelId", channelId);map.put("stat", "1");friendDao.insertFriendLink(map);}}} } package com.dao;import java.sql.Connection; import java.sql.PreparedStatement; import java.sql.ResultSet; import java.sql.SQLException; import java.sql.Timestamp; import java.text.SimpleDateFormat; import java.util.HashSet; import java.util.Date; import java.util.Map; import java.util.Set;import com.util.ConnectUtil;public class FriendLinkDao {public Connection conn = ConnectUtil.getConn();public Integer getChannelId(String channelName) {Integer id = -1;try {String sql = "SELECT id FROM t_zsff_friend_link_channel WHERE channel_name = ?";PreparedStatement ptmt = conn.prepareStatement(sql);ptmt.setString(1, channelName);ResultSet rs = ptmt.executeQuery();while (rs.next()) {id = rs.getInt("id");}return id;} catch (SQLException e) {// TODO Auto-generated catch blocke.printStackTrace();return id; // 返回-1,數據庫插入異常}}public void insertChannelName(String channelName,Integer pid) {String sql = "INSERT INTO t_zsff_friend_link_channel (channel_name, pid) VALUES (?, ?)";try {PreparedStatement ptmt = conn.prepareStatement(sql);ptmt.setObject(1, channelName);ptmt.setObject(2, pid);ptmt.executeUpdate();} catch (Exception e) {e.printStackTrace();// TODO: handle exception}}public void insertFriendLink(Map map) {String sql = "INSERT INTO t_zsff_friend_link (name, channel_id, link_url, stat) VALUES (?, ?, ?, ?)";try {PreparedStatement ptmt = conn.prepareStatement(sql);ptmt.setObject(1, map.get("name"));ptmt.setObject(2, map.get("channelId"));ptmt.setObject(3, map.get("linkUrl"));ptmt.setObject(4, map.get("stat"));ptmt.executeUpdate();} catch (Exception e) {e.printStackTrace();// TODO: handle exception}}} package com.util;import java.sql.Connection; import java.sql.DriverManager; import java.sql.SQLException;public class ConnectUtil {private static Connection conn;public static Connection getConn() {try {//1.加載mysql連接到數據庫jar包,數據庫驅動Class.forName("com.mysql.jdbc.Driver");//2.數據庫所在位置以及要訪問數據庫的名字String url = "jdbc:mysql://127.0.0.7:3306/test?characterEncoding=UTF-8";//3.數據庫的用戶名,密碼String username = "root";String password = "root";//4.使用驅動管理器連接到數據庫conn = DriverManager.getConnection(url,username,password);} catch (ClassNotFoundException e) {// TODO Auto-generated catch blocke.printStackTrace();} catch (SQLException e) {// TODO Auto-generated catch blocke.printStackTrace();}return conn;}public void setConn(Connection conn1) {conn = conn1;}} <dependency><!-- jsoup HTML parser library @ https://jsoup.org/ --><groupId>org.jsoup</groupId><artifactId>jsoup</artifactId><version>1.13.1</version> </dependency>總結
以上是生活随笔為你收集整理的基于Jsoup爬虫Demo的全部內容,希望文章能夠幫你解決所遇到的問題。
- 上一篇: JDK5.0新特性之:泛型
- 下一篇: golang 没有名字参数_Go 返回参