【C#爬虫】抓取XX网站mp4资源地址
生活随笔
收集整理的這篇文章主要介紹了
【C#爬虫】抓取XX网站mp4资源地址
小編覺得挺不錯的,現在分享給大家,幫大家做個參考.
抓取小視頻的url地址,然后將地址信息拷貝到迅雷里批量下載就ok了
主程序 代碼
//yazhouqingseAV 35//zhifusiwaAV 29//zipaishipin 30//oumeiqingseAV 28//katongdongman 31 //tongxingAV 32//sanjidianying 33//fengkuangqunjiao 34var client = new WinHttpHelper();var type = "fengkuangqunjiao";var classid = 34;for (int i = 1; i > -1; i++){Console.WriteLine(i);var index = "_" + i;if (i == 1)index = "";string pageUrl = "http://www.lang34.com/se/" + type + "/index" + index + ".html";var trs = RegexHelper.GetMathList(client.GET(pageUrl, Encoding.UTF8), "" + type + "/(.*?).html");foreach (var item in trs){string temp = "";if (RegexHelper.GetMatchStr(item.ToString(), "" + type + "/(.*?).html", true, out temp)){string url = "http://www.lang34.com/e/DownSys/play/?classid=" + classid + "&id=" + temp + "&pathid=0";string htmltext = client.GET(url, Encoding.UTF8);string mp4 = "";if (RegexHelper.GetMatchStr(htmltext, "f:'(.*?)',", true, out mp4)){string titile = "";RegexHelper.GetMatchStr(htmltext, " <title>(.*?)</title>", true, out titile);string output = mp4 + "?title" + titile + "\r\n";Console.WriteLine(output);File.AppendAllText("D://" + type + ".txt", output);}}}}網絡請求類
using System; using System.Collections.Generic; using System.Text;namespace MyHelper4Web {public class WinHttpHelper{WinHttp.WinHttpRequest request;public string Accept = "*/*";public string UserAgent = "Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 6.1; Trident/5.0; SLCC2; .NET CLR 2.0.50727; .NET CLR 3.5.30729; .NET CLR 3.0.30729; .NET4.0C; InfoPath.2; .NET4.0E)";public string ContentType = "application/json";// "application/x-www-form-urlencoded";public int SetTimeOut = 60;//請求超時時間秒public bool AllowAutoRedirect = true;//是否允許自動跳轉public bool AllowHttpstoHttp = false;//是否允許http與https轉換public WinHttpHelper(){request = new WinHttp.WinHttpRequest();}/// <summary>/// 傳入請求頭的HttpHelper構造函數/// </summary>/// <param name="Accept">Accept</param>/// <param name="UserAgent">UserAgent</param>/// <param name="ContentType">ContentType</param>public WinHttpHelper(string Accept, string UserAgent, string ContentType){this.Accept = Accept;this.UserAgent = UserAgent;this.ContentType = ContentType;}/// <summary>/// 傳入請求頭的HttpHelper構造函數/// </summary>/// <param name="Accept">Accept</param>/// <param name="UserAgent">UserAgent</param>/// <param name="ContentType">ContentType</param>/// <param name="SetTimeOut">SetTimeOut</param>public WinHttpHelper(string Accept, string UserAgent, string ContentType, int SetTimeOut){this.Accept = Accept;this.UserAgent = UserAgent;this.ContentType = ContentType;this.SetTimeOut = SetTimeOut;}/// <summary>/// GET方式請求網頁/// </summary>/// <param name="Url">請求的url</param>/// <returns>以字節數組形式返回響應內容</returns>public byte[] GET(string Url,string refer){byte[] responsebody;try{//不允許自動跳轉if (AllowAutoRedirect == false){request.set_Option(WinHttp.WinHttpRequestOption.WinHttpRequestOption_EnableRedirects, false);}//允許https與http轉換if (AllowHttpstoHttp == true){request.set_Option(WinHttp.WinHttpRequestOption.WinHttpRequestOption_EnableHttpsToHttpRedirects, true);}request.Open("GET", Url, true);request.SetRequestHeader("Accept", Accept);request.SetRequestHeader("User-Agent", UserAgent);if (!string.IsNullOrEmpty(refer)){request.SetRequestHeader("Referer", refer);}request.Send("");request.WaitForResponse(SetTimeOut);responsebody = (byte[])request.ResponseBody;}catch (Exception ex){responsebody = Encoding.Default.GetBytes(ex.Message + ex.Source);////LogHelper.Log.Error("GET方式請求網頁異常", ex); }return responsebody;}/// <summary>/// GET方式請求網頁/// </summary>/// <param name="Url">請求的url</param>/// <param name="Encode">轉換字符串用的編碼</param>/// <returns>以字符串形式返回響應內容</returns>public string GET(string Url, Encoding Encode){string htmltext = "";try{byte[] htmlbyte = GET(Url,"");htmltext = Encode.GetString(htmlbyte);}catch (Exception ex){htmltext = ex.Message + ex.Source;////LogHelper.Log.Error("GET方式請求網頁異常", ex); }return htmltext;}public string GET(string Url,string refer , Encoding Encode){byte[] htmlbyte = GET(Url, refer);return Encode.GetString(htmlbyte);}/// <summary>/// POST方式請求網頁/// </summary>/// <param name="Url">請求的Url</param>/// <param name="PostData">請求傳的值</param>/// <param name="Refer">Refer</param>/// <returns>以字節數組形式返回響應內容</returns>public byte[] POST(string Url, string PostData, string Refer){byte[] responsebody;try{//不允許自動跳轉if (AllowAutoRedirect == false){request.set_Option(WinHttp.WinHttpRequestOption.WinHttpRequestOption_EnableRedirects, false);}//允許https與http轉換if (AllowHttpstoHttp == true){request.set_Option(WinHttp.WinHttpRequestOption.WinHttpRequestOption_EnableHttpsToHttpRedirects, true);}request.Open("POST", Url, true);request.SetRequestHeader("Accept", Accept);request.SetRequestHeader("User-Agent", UserAgent);request.SetRequestHeader("Content-Type", ContentType);if (!string.IsNullOrEmpty(Refer)){request.SetRequestHeader("Referer", Refer);}request.Send(PostData);request.WaitForResponse(SetTimeOut);responsebody = (byte[])request.ResponseBody;}catch (Exception ex){responsebody = Encoding.Default.GetBytes(ex.Message + ex.Source);////LogHelper.Log.Error("POST方式請求網頁異常", ex); }return responsebody;}/// <summary>/// POST方式請求網頁/// </summary>/// <param name="Url">請求的Url</param>/// <param name="PostData">請求傳的值</param>/// <returns>以字節數組形式返回響應內容</returns>public byte[] POST(string Url, string PostData){byte[] responsebody;responsebody = POST(Url, PostData, "");return responsebody;}/// <summary>/// POST方式請求網頁/// </summary>/// <param name="Url">請求的Url</param>/// <param name="PostData">請求傳的值</param>/// <param name="Refer">Refer</param>/// <param name="Encode">轉換字符串用的編碼</param>/// <returns>以字符串形式返回響應內容</returns>public string POST(string Url, string PostData, string Refer, Encoding Encode){string htmltext = string.Empty;try{byte[] responsebody = POST(Url, PostData, Refer);htmltext = Encode.GetString(responsebody);}catch (Exception ex){htmltext = ex.Message + ex.Source;////LogHelper.Log.Error("POST方式請求網頁異常", ex); }return htmltext;}/// <summary>/// POST方式請求網頁/// </summary>/// <param name="Url">請求的Url</param>/// <param name="PostData">請求傳的值</param>/// <param name="Encode">轉換字符串用的編碼</param>/// <returns>以字符串形式返回響應內容</returns>public string POST(string Url, string PostData, Encoding Encode){string htmltext = string.Empty;try{byte[] responsebody = POST(Url, PostData, "");htmltext = Encode.GetString(responsebody);}catch (Exception ex){htmltext = ex.Message + ex.Source;////LogHelper.Log.Error("POST方式請求網頁異常", ex); }return htmltext;}public string GetAllCookis(){string cookis = "";try{cookis = request.GetAllResponseHeaders();}catch (Exception){return "";}return cookis;}} }正則表達式類
using System; using System.Collections.Generic; using System.Text; using System.Text.RegularExpressions; using System.Collections;namespace MyHelper4Web {public class RegexHelper{/// <summary>/// /// </summary>/// <param name="htmltext"></param>/// <param name="pattern"></param>/// <param name="isCut"></param>/// <param name="result"></param>/// <returns></returns>public static bool GetMatchStr(string htmltext, string pattern, bool isCut, out string result){bool IsGetSuccess = false;result = "";try{IsGetSuccess = GetMatchStr(htmltext, pattern, out result);if (!isCut){string[] replaceStrs = new string[2];if (pattern.Contains("(.*?)")){string splitStr = pattern.Replace("(.*?)", "|");replaceStrs = splitStr.Split('|');}result = replaceStrs[0] + result + replaceStrs[1];}}catch (Exception ex){IsGetSuccess = false;}return IsGetSuccess;}public static string GetMatchString(string htmltext, string pattern, bool isCut){string result = "";try{GetMatchStr(htmltext, pattern, out result);if (isCut){string[] replaceStrs = new string[2];if (pattern.Contains("(.*?)")){string splitStr = pattern.Replace("(.*?)", "|");replaceStrs = splitStr.Split('|');}result = result.Replace(replaceStrs[0], "").Replace(replaceStrs[1], ""); }return result;}catch (Exception ex){return "";}}/// <summary>/// 正則表達式dan匹配方法/// </summary>/// <param name="htmltext">網頁內容</param>/// <param name="pattern">模式字符串</param>/// <param name="result">返回匹配成功的字符串</param>/// <returns>匹配是否成功</returns>public static bool GetMatchStr(string htmltext, string pattern, out string result){bool IsGetSuccess = false;result = "";try{string[] replaceStrs=new string[2];if (pattern.Contains("(.*?)")){string splitStr = pattern.Replace("(.*?)", "^");replaceStrs = splitStr.Split('^');}Regex regex = new Regex(pattern, RegexOptions.Singleline | RegexOptions.IgnoreCase);Match match = regex.Match(htmltext);if (match.Success){result = match.ToString();result = result.Replace(replaceStrs[0], "").Replace(replaceStrs[1], "");}else{IsGetSuccess = false;}}catch (Exception ex){IsGetSuccess = false;}finally{if (!string.IsNullOrEmpty(result)){IsGetSuccess = true;}else{IsGetSuccess = false;}}return IsGetSuccess;}/// <summary>/// 正則多匹配,返回匹配ArrayList數組/// </summary>/// <param name="htmltext">網頁內容</param>/// <param name="pattern">模式字符串</param>/// <returns></returns>public static ArrayList GetMathList(string htmltext, string pattern){ArrayList list = new ArrayList();try{MatchCollection mc;//定義一個Regex對象實例 Regex regex = new Regex(pattern, RegexOptions.Singleline | RegexOptions.IgnoreCase);//或者多行匹配模式RegexOptions.Multiline mc = regex.Matches(htmltext);//在輸入字符串中找到所有匹配for (int i = 0; i < mc.Count; i++){//匹配一條信息就處理string groupcode = mc[i].Value.ToString();//處理函數 list.Add(groupcode);}}catch (Exception){return null;}return list;}///// <summary>///// 正則表達式duo匹配方法///// </summary>///// <param name="htmltext">網頁內容</param>///// <param name="patterns">模式字符串數組</param>///// <param name="result">返回匹配成功的字符串</param>///// <returns>匹配是否成功</returns>//public static bool GetMathStr(string htmltext, string[] patterns, out string result)//{// bool IsGetSuccess = false;// result = "";// try// {// string temp = htmltext;// for (int i = 0; i < patterns.Length; i++)// {// Regex regex = new Regex(patterns[i], RegexOptions.Singleline | RegexOptions.IgnoreCase);// Match match = regex.Match(temp);// if (match.Success)// {// temp = match.ToString();// if (i == patterns.Length - 1)// {// result = temp;// }// }// else// {// break;// }// }// }// catch (Exception ex)// {// IsGetSuccess = false;// }// finally// {// if (!string.IsNullOrEmpty(result))// {// IsGetSuccess = true;// }// else// {// IsGetSuccess = false;// }// }// return IsGetSuccess;//} } }?
總結
以上是生活随笔為你收集整理的【C#爬虫】抓取XX网站mp4资源地址的全部內容,希望文章能夠幫你解決所遇到的問題。
- 上一篇: SD 模块与FICO、MM、PS、QM、
- 下一篇: CCNA培训(五)_20210725da