node实现爬取当前页面链接实现
生活随笔
收集整理的這篇文章主要介紹了
node实现爬取当前页面链接实现
小編覺得挺不錯的,現在分享給大家,幫大家做個參考.
首先說明 這是自己學習node的過程中自己的小練習,想通過自己學習的幾個模塊,簡單實現爬取頁面鏈接的小工具,若有不足之處,希望大家多多指教。
const superAgent = require('superagent')//superagent是nodejs里一個非常方便的客戶端請求代理模塊(類似ajax),當你想處理get,post,put,delete,head請求時,你就應該想起該用它了. const cheerio = require('cheerio')//為服務器特別定制的,快速、靈活、實施的jQuery核心實現. const fs = require('fs') const path = require('path')const testUrl = "http://ah.10086.cn/m"//測試鏈接 function getLinkByUrl(url){var readLink = new Promise(function(resolve,reject){superAgent.get(testUrl).end((err,res)=>{if (err){console.log('無效地址111')reject('無效地址222')}else{console.log('=========html=============\n ',res.text)let $ = cheerio.load(res.text)let obj = {title:'',linkArry:[],count:0}obj.title = $('title').text()$('a').each(function(ind,element){let href = $(element).attr('href')||''let name = $(element).text().trim()let a = {name,href}obj.linkArry.push(a)obj.count++});resolve(obj);}})})return readLink}function writeJsonFile(data){let promise = new Promise(function(resolve,reject){fs.exists('./data',function(exists){if(!exists){console.log('data文件夾不存在。。。')fs.mkdir('./data',function(err){if (err) return console.log(err)console.log("文件夾創建成功");var file = path.join(__dirname,`data/${data.title}.json`)fs.writeFile(file,JSON.stringify(data),function(err){if(err){return reject('json文件創建失敗。。。')}else{resolve('json文件創建成功!')}})})}else{console.log('data文件夾存在!')fs.exists(`./data/${data.title}.json`,function(exists){if(!exists){console.log('json文件不存在。。。')var file = path.join(__dirname,`data/${data.title}.json`)fs.writeFile(file,JSON.stringify(data),function(err){if(err){return reject('json文件創建失敗2。。。')}else{resolve('json文件創建成功2!')}})}else{resolve('json文件存在!')}})}})})return promise }getLinkByUrl(testUrl).then(function(resolve){let obj = resolveconsole.log('=============resolve1=',obj)return writeJsonFile(obj)},function(reject){console.log('=============reject1=',reject)}).then(function(resolve){console.log('===========resolve2=',resolve)},function(reject){console.log('===========resolve2=',reject)}).catch(function(err){console.log('=========err=',err)})執行后,生成文件內容
{"title": "安徽移動個人觸屏版網廳","linkArry": [{"name": "","href": "javascript:void\n\n(window.location.href='http://ah.10086.cn/mpad/pad/num/number_list.html');"},{"name": "","href": "javascript:void\n\n(window.location.href='http://ah.10086.cn/mpad/pad/num/number_list.html');"},{"name": "+充話費","href": ""},{"name": "+充流量","href": ""},{"name": "業務辦理","href": "http://ah.10086.cn/m/pages/pad/operate/openBusiIndex.html"},{"name": "手機賣場","href": "http://ah.10086.cn/mpad/pad/index.html"},{"name": "寬帶專區","href": "http://ah.10086.cn/m/pages/pad/kdzq/index.html"},{"name": "選號入網","href": "http://ah.10086.cn/mpad/pad/num/number_list.html"},{"name": "流量專區","href": "http://ah.10086.cn/m/pages/pad/operate/flowZQ/index.html"},{"name": "流量紅包","href": "http://ah.10086.cn/m/pages/draw/downloadkhd/downloadkhd.html?code=4&&WT.mc_ev=GXHXZY4"},{"name": "4G特惠","href": "http://ah.10086.cn/dt/khd"},{"name": "下載手廳","href": "http://ah.10086.cn/dt/khd"},{"name": "","href": "http://ah.10086.cn/dt/khd"},{"name": "","href": "http://ah.10086.cn/mpad/pad/act/haokarwy/index2.html"},{"name": "","href": "http://ah.10086.cn/mpad/hhg"},{"name": "","href": "http://ah.10086.cn/m/pages/draw/broadpromotion/index.html"},{"name": "","href": "http://ah.10086.cn/zsyyt/ahmobile/download/mobileDownLoadApk.do"},{"name": "","href": ""},{"name": "馬上下載","href": "http://ah.10086.cn/zsyyt/ahmobile/download/mobileDownLoadApk.do"}],"count": 19 }?
轉載于:https://www.cnblogs.com/shichangchun/p/9700009.html
《新程序員》:云原生和全面數字化實踐50位技術專家共同創作,文字、視頻、音頻交互閱讀總結
以上是生活随笔為你收集整理的node实现爬取当前页面链接实现的全部內容,希望文章能夠幫你解決所遇到的問題。
- 上一篇: 简单使用gridlayout
- 下一篇: numpy常用函数