JAVA——基于HttpComponents(HttpClient)的简单网络爬虫DEMO
生活随笔
收集整理的這篇文章主要介紹了
JAVA——基于HttpComponents(HttpClient)的简单网络爬虫DEMO
小編覺得挺不錯的,現在分享給大家,幫大家做個參考.
基本概念
HttpComponents(HttpClient):
超文本傳輸??協議(HTTP)可能是當今Internet上使用的最重要的協議。Web服務,支持網絡的設備和網絡計算的增長繼續將HTTP協議的作用擴展到用戶驅動的Web瀏覽器之外,同時增加了需要HTTP支持的應用程序的數量。
HttpComponents是為擴展而設計的,同時提供了對基本HTTP協議的強大支持,對于構建HTTP感知的客戶端和服務器應用程序(例如Web瀏覽器,Web Spider,HTTP代理,Web服務傳輸庫或利用或擴展HTTP協議以進行分布式通信。
官網
官網地址:http://hc.apache.org/?
Maven
<!-- https://mvnrepository.com/artifact/org.apache.httpcomponents/httpcore --><dependency><groupId>org.apache.httpcomponents</groupId><artifactId>httpcore</artifactId><version>4.4.10</version></dependency><!-- https://mvnrepository.com/artifact/org.apache.httpcomponents/httpclient --><dependency><groupId>org.apache.httpcomponents</groupId><artifactId>httpclient</artifactId><version>4.5.6</version></dependency><!-- https://mvnrepository.com/artifact/org.apache.commons/commons-collections4 --><dependency><groupId>org.apache.commons</groupId><artifactId>commons-collections4</artifactId><version>4.1</version></dependency><!-- https://mvnrepository.com/artifact/org.jsoup/jsoup --><dependency><groupId>org.jsoup</groupId><artifactId>jsoup</artifactId><version>1.11.3</version></dependency>源代碼
HTTPClientPool?
package club.zstuca.httpclient;import java.security.KeyManagementException; import java.security.KeyStoreException; import java.security.NoSuchAlgorithmException; import java.security.cert.CertificateException; import java.security.cert.X509Certificate;import org.apache.http.config.Registry; import org.apache.http.config.RegistryBuilder; import org.apache.http.conn.socket.ConnectionSocketFactory; import org.apache.http.conn.socket.PlainConnectionSocketFactory; import org.apache.http.conn.ssl.NoopHostnameVerifier; import org.apache.http.conn.ssl.SSLConnectionSocketFactory; import org.apache.http.impl.client.BasicCookieStore; import org.apache.http.impl.client.CloseableHttpClient; import org.apache.http.impl.client.HttpClients; import org.apache.http.impl.conn.PoolingHttpClientConnectionManager; import org.apache.http.ssl.SSLContextBuilder; import org.apache.http.ssl.TrustStrategy;/*** Https忽略證書*/ public class HTTPClientPool {private static final String HTTP = "http";private static final String HTTPS = "https";private static SSLConnectionSocketFactory sslConnectionSocketFactory = null;private static PoolingHttpClientConnectionManager poolingHttpClientConnectionManager = null;//連接池管理類private static SSLContextBuilder sslContextBuilder = null;//管理Https連接的上下文類static {try {sslContextBuilder = new SSLContextBuilder().loadTrustMaterial(null,new TrustStrategy() {@Overridepublic boolean isTrusted(X509Certificate[] x509Certificates, String s)throws CertificateException {// 信任所有站點 直接返回truereturn true;}});//"SSLv2Hello", "SSLv3", "TLSv1"sslConnectionSocketFactory = new SSLConnectionSocketFactory(sslContextBuilder.build(),new String[]{"TLSv1.2"},null,NoopHostnameVerifier.INSTANCE);Registry<ConnectionSocketFactory> registryBuilder = RegistryBuilder.<ConnectionSocketFactory>create().register(HTTP, new PlainConnectionSocketFactory()).register(HTTPS, sslConnectionSocketFactory).build();poolingHttpClientConnectionManager = new PoolingHttpClientConnectionManager(registryBuilder);poolingHttpClientConnectionManager.setMaxTotal(200);} catch (NoSuchAlgorithmException e) {e.printStackTrace();} catch (KeyStoreException e) {e.printStackTrace();} catch (KeyManagementException e) {e.printStackTrace();}}/*** 獲取連接** @return* @throws Exception*/public static CloseableHttpClient getHttpClient() throws Exception {CloseableHttpClient httpClient = HttpClients.custom().setSSLSocketFactory(sslConnectionSocketFactory).setConnectionManager(poolingHttpClientConnectionManager).setConnectionManagerShared(true).setDefaultCookieStore(new BasicCookieStore()).setUserAgent("Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/76.0.3809.132 Safari/537.36").build();return httpClient;} }Web Crawler
package club.zstuca.httpclient;import org.apache.http.*; import org.apache.http.client.config.RequestConfig; import org.apache.http.client.methods.CloseableHttpResponse; import org.apache.http.client.methods.HttpGet; import org.apache.http.client.methods.HttpPost; import org.apache.http.client.methods.HttpRequestBase; import org.apache.http.client.utils.URIBuilder; import org.apache.http.impl.client.CloseableHttpClient; import org.apache.http.util.EntityUtils; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.springframework.util.StringUtils;import java.io.IOException; import java.net.URISyntaxException; import java.util.Map;/*** Http/Https請求的工具類*/ public class HTTPClientUtil {// 日志private static Logger logger = LoggerFactory.getLogger(HTTPClientUtil.class);// Request params default Configprivate static RequestConfig requestConfig = RequestConfig.custom().setConnectTimeout(5000).setConnectionRequestTimeout(5000).setSocketTimeout(5000).setRedirectsEnabled(false).build();// HttpClientprivate static CloseableHttpClient httpClient = null;// HTTP Requestprivate static HttpRequestBase httpRequest = null;// HTTP Responseprivate static CloseableHttpResponse httpResponse = null;/**** @param HttpRequestType* @param url* @param header* @param params* @param httpEntity* @return*/public static String doRequest(String HttpRequestType,String url,Map<String, String> header,Map<String, String> params,HttpEntity httpEntity) {String resultStr = "";if (StringUtils.isEmpty(url)) {return resultStr;}try {// Set GET paramssetHttpURIParams(url,params);// Set POST paramsif("POST".equals(HttpRequestType)&&httpEntity != null){((HttpPost)httpRequest).setEntity(httpEntity);}// Set HTTP headersetHttpHeader(header);// Send POSTsendHttpRequest();// ResponseresultStr = dealWithHttpResponse();} catch (Exception e) {e.printStackTrace();} finally {closeConnection();}return resultStr;}/*** 發送POST請求** @param url:請求地址* @param header:請求頭參數* @param httpEntity:表單參數 form提交 json/xml參數* @return*/public static String doPostRequest(String url, Map<String, String> header, HttpEntity httpEntity) {String resultStr = "";if (StringUtils.isEmpty(url)) {return resultStr;}try {getHttpRequest("POST");HttpPost httpPost = (HttpPost)httpRequest;httpPost.setURI(new URIBuilder(url).build());// Set HTTP headersetHttpHeader(header);// Set POST paramsif (httpEntity != null) {httpPost.setEntity(httpEntity);}sendHttpRequest();// ResponseresultStr = dealWithHttpResponse();} catch (Exception e) {e.printStackTrace();} finally {closeConnection();}return resultStr;}/*** 發送GET請求* @param url URL* @param header HTTP header info* @param params GET params* @return*/public static String doGetRequest(String url, Map<String, String> header, Map<String, String> params) {String resultStr = "";if (StringUtils.isEmpty(url)) {return resultStr;}try {// getHttpRequestgetHttpRequest("GET");// Set GET paramssetHttpURIParams(url,params);// Set HTTP headersetHttpHeader(header);// Send POSTsendHttpRequest();// ResponseresultStr = dealWithHttpResponse();} catch (Exception e) {e.printStackTrace();} finally {closeConnection();}return resultStr;}/**** @param HttpRequestType* @throws Exception*/private static void getHttpRequest(String HttpRequestType) throws Exception {httpClient = HTTPClientPool.getHttpClient();if("GET".equals(HttpRequestType)){httpRequest = new HttpGet();}else if("POST".equals(HttpRequestType)){httpRequest = new HttpPost();}}/**** @param header*/private static void setHttpHeader(Map<String, String> header){if (!(header == null || header.isEmpty())) {for (Map.Entry<String, String> headerEntry : header.entrySet()) {httpRequest.setHeader(headerEntry.getKey(), headerEntry.getValue());}}}/**** @param url* @param params* @throws URISyntaxException*/private static void setHttpURIParams(String url,Map<String, String> params) throws URISyntaxException {// URIBuilderURIBuilder urlbuilder = new URIBuilder(url);if (!(params == null || params.isEmpty())) {// Set GET paramsfor (Map.Entry<String, String> stringStringEntry : params.entrySet()) {urlbuilder.setParameter(stringStringEntry.getKey(), stringStringEntry.getValue());}}httpRequest.setURI(urlbuilder.build());}/**** @throws IOException*/private static void sendHttpRequest() throws IOException {// Request ConfighttpRequest.setConfig(requestConfig);// Send POSThttpResponse = httpClient.execute(httpRequest);return ;}/**** @return Response String UTF-8*/private static String dealWithHttpResponse(){String resultStr = "";try{if (httpResponse.getStatusLine().getStatusCode() == HttpStatus.SC_OK) {resultStr = EntityUtils.toString(httpResponse.getEntity(),"UTF-8");} else {StringBuffer stringBuffer = new StringBuffer();HeaderIterator headerIterator = httpResponse.headerIterator();while (headerIterator.hasNext()) {stringBuffer.append("\t" + headerIterator.next());}}}catch (IOException e) {e.printStackTrace();}return resultStr;}/*** 關掉連接釋放資源*/private static void closeConnection() {if (httpClient != null) {try {httpClient.close();} catch (IOException e) {e.printStackTrace();}}if (httpResponse != null) {try {httpResponse.close();} catch (IOException e) {e.printStackTrace();}}}}TEST?
package clua.zstuca;import club.zstuca.httpclient.HTTPClientUtil;import java.util.HashMap;public class HTTPTEST {public static void main(String[] args) {HTTPClientUtil.doGetRequest("http://www.baidu.com",null,null);HTTPClientUtil.doGetRequest("http://api.help.bj.cn/apis/weather/", null, new HashMap<String, String>(){{put("id","101060101");}});} }教學資源
https://www.bilibili.com/video/av68932809?
參考文章
https://blog.csdn.net/qwe86314/article/details/91450098
總結
以上是生活随笔為你收集整理的JAVA——基于HttpComponents(HttpClient)的简单网络爬虫DEMO的全部內容,希望文章能夠幫你解決所遇到的問題。
- 上一篇: Spring Boot——[java.l
- 下一篇: Spring Boot——[JPA 无法