Java多线程:示例代码
生活随笔
收集整理的這篇文章主要介紹了
Java多线程:示例代码
小編覺得挺不錯的,現在分享給大家,幫大家做個參考.
代碼
import java.io.IOException; import java.io.InputStream; import java.net.HttpURLConnection; import java.net.URL; import java.util.*; import java.util.concurrent.*; import java.util.regex.Matcher; import java.util.regex.Pattern;/** 如果設置了自定義域名,將博客域名前綴填寫入19行的變量userId中,點擊運行*/ public class UrlCrawBroke {static int maxPages = 20; // 填寫你的博客查找頁數static String userId = "hanquan";// 這里填入主頁名稱:例如主業為 https://hanquan.blog.csdn.net/ 則填入 hanquan 即可static final String homeUrl = "https://" + userId + ".blog.csdn.net/article/list/";static Set<String> urlSet = new HashSet<>();public static void getUrls() throws IOException, InterruptedException {InputStream is;String pageStr;StringBuilder curUrl = null;for (int i = 1; i < maxPages; i++) {Thread.sleep(500);System.out.println("正在查找第 " + i + " 頁中的博客地址");curUrl = new StringBuilder(homeUrl);curUrl.append(i);System.out.println(curUrl);is = doGet(curUrl.toString());pageStr = inputStreamToString(is, "UTF-8");// 一整頁的html源碼List<String> list = getMatherSubstrs(pageStr, "(?<=href=\")https://hanquan.blog.csdn.net/article/details/[0-9]{8,9}(?=\")");urlSet.addAll(list);System.out.println("加入 " + list.size() + " 個url");}}public static void main(String urlstr[]) throws IOException, InterruptedException {// ----------------------------------------------遍歷每一頁 獲取文章鏈接----------------------------------------------getUrls();// ---------------------------------------------------打印每個鏈接---------------------------------------------------System.out.println("打印每個鏈接");for (String s : urlSet) {System.out.println(s);}System.out.println("打印每個鏈接完畢");// ---------------------------------------------------多線程訪問每個鏈接---------------------------------------------------ExecutorService executor = Executors.newCachedThreadPool();int threadCount = 5; // 并發線程數量for (int i = 0; i < threadCount; i++) {executor.execute(new MyThread(urlSet));}executor.shutdown();}public static InputStream doGet(String urlstr) throws IOException {URL url = new URL(urlstr);HttpURLConnection conn = (HttpURLConnection) url.openConnection();conn.setRequestProperty("User-Agent","Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/56.0.2924.87 Safari/537.36");InputStream inputStream = conn.getInputStream();return inputStream;}public static String inputStreamToString(InputStream is, String charset) throws IOException {byte[] bytes = new byte[1024];int byteLength = 0;StringBuffer sb = new StringBuffer();while ((byteLength = is.read(bytes)) != -1) {sb.append(new String(bytes, 0, byteLength, charset));}return sb.toString();}// 正則匹配public static List<String> getMatherSubstrs(String str, String regex) {List<String> list = new ArrayList<String>();Pattern p = Pattern.compile(regex);Matcher m = p.matcher(str);while (m.find()) {list.add(m.group());}return list;} }class MyThread implements Runnable {public List<String> urlList;public MyThread(Set<String> urls) {List list = new ArrayList(urls);Collections.shuffle(list);this.urlList = list;}@Overridepublic void run() {int i = 0;for (String s : urlList) {try {doGet(s);System.out.println(Thread.currentThread().getName() + "成功訪問第" + (++i) + "個鏈接,共" + urlList.size() + "個:" + s);} catch (IOException e) {e.printStackTrace();}}}public static InputStream doGet(String urlstr) throws IOException {URL url = new URL(urlstr);HttpURLConnection conn = (HttpURLConnection) url.openConnection();conn.setRequestProperty("User-Agent","Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/56.0.2924.87 Safari/537.36");InputStream inputStream = conn.getInputStream();return inputStream;} }到此結束啦!
總結
以上是生活随笔為你收集整理的Java多线程:示例代码的全部內容,希望文章能夠幫你解決所遇到的問題。
- 上一篇: 左神算法:二叉树的最大 / 最小深度(普
- 下一篇: 面试必会系列 - 4.1 程序员必须掌握