nodejs结合cheerio实现简单爬虫
![]()
1 let cheerio = require("cheerio"),
2 fs = require("fs"),
3 util = require("util"),
4 html = "",
5 https = require('https'),
6 list = [],
7 buffer = null,
8 newslist = [],
9 url = 'https://www.yidaiyilu.gov.cn/';
10 let req = https.request("https://www.yidaiyilu.gov.cn/",function(res){
11 res.on("data",function(data){
12 list.push(data)
13 })
14 res.on("end",function(){
15 buffer = Buffer.concat(list)
16 html = buffer.toString()
17 $ = cheerio.load(html)
18 for(var i=1;i<=3;i++){
19 let dlist = `.con_yw_${i}`;
20 $(".mybox .main-1").find(dlist).find('a').each((index,ele)=>{
21 let txt = $(ele).text();
22 let alink = $(ele).attr("href")
23 let news = {};
24 news["title"] = txt;
25 news["url"] = url.substring(0,url.length-1)+alink;
26 newslist.push(news)
27 })
28 }
29 console.log(newslist)
30 })
31 })
32 req.end()
View Code