爬虫 spider05——使用httpclient发送get请求、post请求
生活随笔
收集整理的這篇文章主要介紹了
爬虫 spider05——使用httpclient发送get请求、post请求
小編覺得挺不錯的,現(xiàn)在分享給大家,幫大家做個參考.
百度解釋
HttpClient 是Apache Jakarta Common 下的子項目,可以用來提供高效的、最新的、功能豐富的支持 HTTP 協(xié)議的客戶端編程工具包,并且它支持 HTTP 協(xié)議最新的版本和建議。
?httpClient專為java發(fā)送http請求而生的, 如果要httpClient ,需要先進行導包
? ? ? ??
<dependency><groupId>org.apache.httpcomponents</groupId><artifactId>httpclient</artifactId><version>4.5.4</version></dependency>Get?
import org.apache.http.Header; import org.apache.http.HttpEntity; import org.apache.http.StatusLine; import org.apache.http.client.methods.CloseableHttpResponse; import org.apache.http.client.methods.HttpGet; import org.apache.http.impl.client.CloseableHttpClient; import org.apache.http.impl.client.HttpClients; import org.apache.http.util.EntityUtils;import java.io.IOException;public class HttpClientGet {public static void main(String[] args) throws IOException {//請求地址String toUrl="http://www.manmanbuy.com";//封裝地址到get請求中HttpGet httpGet=new HttpGet(toUrl);//設置請求頭信息:User-AgenthttpGet.setHeader("User-Agent","Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/77.0.3865.90 Safari/537.36");//創(chuàng)建執(zhí)行請求對象CloseableHttpClient closeableHttpClient = HttpClients.createDefault();//執(zhí)行請求,獲取響應對象CloseableHttpResponse closeableHttpResponse = closeableHttpClient.execute(httpGet);//獲取http請求響應信息StatusLine statusLine = closeableHttpResponse.getStatusLine();//獲取狀態(tài)碼int respCode=statusLine.getStatusCode();System.out.println("respCode:"+respCode);//獲取響應的Content typeHeader[] headersType = closeableHttpResponse.getHeaders("Content-Type");System.out.println(headersType.length);System.out.println(headersType[0].getName()+"----"+headersType[0].getValue());//獲取響應數(shù)據,并封裝到實體對象HttpEntity httpEntity = closeableHttpResponse.getEntity();//從實體類中取出爬取的數(shù)據String pageMessage = EntityUtils.toString(httpEntity, "utf-8");//System.out.println(pageMessage);} }結果展示:
?Post
import org.apache.http.Consts; import org.apache.http.Header; import org.apache.http.HttpEntity; import org.apache.http.NameValuePair; import org.apache.http.client.entity.UrlEncodedFormEntity; import org.apache.http.client.methods.CloseableHttpResponse; import org.apache.http.client.methods.HttpGet; import org.apache.http.client.methods.HttpPost; import org.apache.http.impl.client.CloseableHttpClient; import org.apache.http.impl.client.HttpClients; import org.apache.http.message.BasicNameValuePair; import org.apache.http.util.EntityUtils; import org.jsoup.Jsoup; import org.jsoup.nodes.Document; import org.jsoup.select.Elements;import java.io.IOException; import java.io.UnsupportedEncodingException; import java.util.ArrayList; import java.util.List; /*** post請求登錄慢慢買網站*/ public class HttpClientPost {public static void main(String[] args) throws IOException {//登錄請求地址String toLogin="http://home.manmanbuy.com/login.aspx";//封裝請求HttpPost httpPost=new HttpPost(toLogin);//設置請求頭信息,模擬瀏覽器登錄//httpPost.setHeader("User-Agent","Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/77.0.3865.90 Safari/537.36");httpPost.setHeader("Referer","http://home.manmanbuy.com/login.aspx?tourl=http%3a%2f%2fwww.manmanbuy.com%2fdefault.aspx");//httpPost.setHeader("Content-Type","application/x-www-form-urlencoded");httpPost.setHeader("Cookie","_ga=GA1.2.1458535014.1567059656; _gid=GA1.2.1328820533.1570633933; Hm_lvt_85f48cee3e51cd48eaba80781b243db3=1570634150,1570709948,1570717507,1570756038; ASP.NET_SessionId=rjnpykzbfs1yyvmdrtclz455; Hm_lpvt_85f48cee3e51cd48eaba80781b243db3=1570763535; _gat_gtag_UA_145348783_1=1");httpPost.setHeader("Accept","text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3");//封裝表單數(shù)據BasicNameValuePair txtName= new BasicNameValuePair("txtUser", "13699268827");BasicNameValuePair txtPass= new BasicNameValuePair("txtPass", "manmanmai");BasicNameValuePair autoLogin= new BasicNameValuePair("autoLogin", "on");BasicNameValuePair btnLogin= new BasicNameValuePair("btnLogin", "登陸");BasicNameValuePair viewState= new BasicNameValuePair("__VIEWSTATE", "/wEPDwULLTIwNjQ3Mzk2NDFkGAEFHl9fQ29udHJvbHNSZXF1aXJlUG9zdEJhY2tLZXlfXxYBBQlhdXRvTG9naW4voj01ABewCkGpFHsMsZvOn9mEZg==");BasicNameValuePair eventValidation= new BasicNameValuePair("__EVENTVALIDATION", "/wEWBQLW+t7HAwLB2tiHDgLKw6LdBQKWuuO2AgKC3IeGDJ4BlQgowBQGYQvtxzS54yrOdnbC");// 表單數(shù)據封裝到集合中List<NameValuePair> formParams = new ArrayList<NameValuePair>();formParams.add(txtName);formParams.add(txtPass);formParams.add(autoLogin);formParams.add(btnLogin);formParams.add(viewState);formParams.add(eventValidation);//把表單集合對象封裝到表單實體中UrlEncodedFormEntity urlEncodedFormEntity=new UrlEncodedFormEntity(formParams, Consts.UTF_8);//把表單實體封裝到post請求中httpPost.setEntity(urlEncodedFormEntity);//*******************************************************//創(chuàng)建請求對象CloseableHttpClient closeableHttpClient = HttpClients.createDefault();//用戶登錄成功之后的響應對象CloseableHttpResponse closeableHttpResponse = closeableHttpClient.execute(httpPost);int code=closeableHttpResponse.getStatusLine().getStatusCode();if(code==302){//狀態(tài)碼為302意味著登錄成功,并且服務器端做了重定向跳轉//通過響應對象獲取locationHeader[] locations = closeableHttpResponse.getHeaders("Location");String respUrl=locations[0].getValue();//爬取用戶中心頁面String toUserCenterPage="http://home.manmanbuy.com"+respUrl;//用戶中心get請求HttpGet httpGet=new HttpGet(toUserCenterPage);//--httpGet.setHeader("Referer","http://www.manmanbuy.com/default.aspx");//--//獲取第一次登錄成功后的cookieHeader[] headerCookies = closeableHttpResponse.getHeaders("Set-Cookie");System.out.println("headerCookies:"+headerCookies.length);//System.out.println(headerCookies[0].getName()+"----"+headerCookies[0].getValue());//獲取第一請求響應后的cookie,并把其cookie值賦予第二個請求httpGet.setHeader("Cookie",headerCookies[0].getValue());//Httpclient對象closeableHttpClient = HttpClients.createDefault();//執(zhí)行請求 user center pageCloseableHttpResponse closeableHttpResponseUserCenter = closeableHttpClient.execute(httpGet);//獲取頁面信息 user center pageHttpEntity userCenterEntity = closeableHttpResponseUserCenter.getEntity();//String userCenterPage = EntityUtils.toString(userCenterEntity, "gb2312");//把用戶中心的頁面轉換成document對象Document userCenterDocument = Jsoup.parse(userCenterPage);Elements integrals = userCenterDocument.select("#aspnetForm > div.udivright > div:nth-child(2) > table > tbody > tr > td:nth-child(1) > table:nth-child(2) > tbody > tr > td:nth-child(2) > div:nth-child(1) > font");System.out.println(integrals.text());}} }?
?結果展示:
總結
總結: httpClient使用步驟:
- 導包
- 確定首頁URL
- 創(chuàng)建httpClient對象: HttpClients.createDefault();
- 設置請求方式: HttpGet HttpPost
設置請求參數(shù) 和 請求頭
post設置參數(shù):
```java List list = new ArrayList();
??????list.add(new BasicNameValuePair("username","rose"));
??????list.add(new BasicNameValuePair("password","123"));
??????HttpEntity entity = new UrlEncodedFormEntity(list);
??????httpPost.setEntity(entity);
```
- 發(fā)送請求, 獲取響應對象CloseableHttpResponse response = ?httpClient.execute(get/post)
- 獲取數(shù)據:
- 響應行 響應頭 響應體
?
?
總結
以上是生活随笔為你收集整理的爬虫 spider05——使用httpclient发送get请求、post请求的全部內容,希望文章能夠幫你解決所遇到的問題。
- 上一篇: 爬虫 spider02——详析http
- 下一篇: Java NIO:Buffer、Chan