前面的话

  本文将使用nodeJS实现一个简单的网页爬虫功能

 

网页源码

  使用http.get()方法获取网页源码,以hao123网站的头条页面为例

http://tuijian.hao123.com/hotrank
var http = require('http');
http.get('http://tuijian.hao123.com/hotrank',function(res){
    var data = '';
    res.on('data',function(chunk){
        data += chunk;
    });
    res.on('end',function(){
        console.log(data);
    })
});

  获得的结果如下所示:

<!DOCTYPE html><html lang="zh_CN" bigrender="fe:widget/js/lib/bigRender.js">
<head><noscript><meta http-equiv="refresh" content="0; URL='/tuijian/hotrank?__noscript__-=1'" /></noscript>

<meta charset="utf-8">
<meta http-equiv="X-UA-Compatible" content="IE=edge">
<meta name="viewport" content="width=device-width, initial-scale=1, maximum-scale=1, user-scalable=no">
<meta name="referrer" content="always">







<meta name="baidu-site-verification" content="cCHudceyEP" />
<meta name="baidu_union_verify" content="d7d644c8a5cb51b46c900d802d906116">
<meta name="keywords" content="头条,新闻,推荐,国内,国际,本地,财经,军事,娱乐,体育,社会,汽车,网站,新闻导航,今日头条,头条新闻,最新新闻,2017最新新闻,新闻名站,新闻大全,新闻门户" />
<meta name="description" content="hao123新闻频道,聚合全网最新的新闻,最热点的新闻,实时新闻热搜词,热门新闻报刊;更有军事新闻,娱乐新闻,体育新闻,图片新闻,汽车新闻,女性新闻,财经新闻,国内新闻,国际新闻等分类新闻" />


<title>热点排行榜-头条新闻-hao123新闻导航_hao123上网导航</title>




<link rel="shortcut icon" href="//www.hao123.com/favicon.ico" />











<script>
        
         window.pageId = window.pageId || "hao123-xinwen-tuijian-hotrank";
         window.pageVP = window.pageVP || "hao123-xinwen-tuijian-hotrank";
        </script>
<!--[if lt IE 7]>
<script src="http://s0.hao123img.com/res/js/common/dd_belatedpng.min.js"></script>
<script>
                DD_belatedPNG.fix('#channelTitle');
            </script>
<![endif]-->
<script>window.HAO=window.HAO||{};window.HAO.https = false;window.HAO.httpsTrans = function(url){return url};</script>




<link rel="stylesheet" type="text/css" href="http://s1.hao123img.com/resource/fe/pkg/aio-eef856ab5.231bb088c.css" /><link rel="stylesheet" type="text/css" href="http://s2.hao123img.com/resource/tuijian/css/hotrank.38645dd.css" /><link rel="stylesheet" type="text/css" href="http://s1.hao123img.com/resource/fe/widget/ui/header/common/v2/header.8d1d978b0.css" /><link rel="stylesheet" type="text/css" href="http://s0.hao123img.com/resource/fe/widget/ui/header/common/v2/logo/logo.6cca09af6.css" /><link rel="stylesheet" type="text/css" href="http://s1.hao123img.com/resource/fe/widget/ui/header/common/v2/sitemap/sitemap.a0832ac19.css" /><link rel="stylesheet" type="text/css" href="http://s2.hao123img.com/resource/fe/widget/ui/header/common/v2/adv/adv.25330c25d.css" /><link rel="stylesheet" type="text/css" href="http://s0.hao123img.com/resource/fe/widget/ui/header/common/v2/form/form.deba0d4c0.css" /><link rel="stylesheet" type="text/css" href="http://s0.hao123img.com/resource/fe/widget/ui/header/common/v2/tools/tools.1c81d5fc6.css" /><link rel="stylesheet" type="text/css" href="http://s2.hao123img.com/resource/fe/widget/ui/header/common/v2/nav/nav.0c7877e81.css" /><link rel="stylesheet" type="text/css" href="http://s2.hao123img.com/resource/fe/widget/ui/header/common/v2/tuiguang/tuiguang.6e9548c75.css" /><link rel="stylesheet" type="text/css" href="http://s0.hao123img.com/resource/tuijian/widget/index/hotrank/hotrank.38645dd.css" /><link rel="stylesheet" type="text/css" href="http://s2.hao123img.com/resource/tuijian/widget/index/hotrank/index/slider/slider.3f6d691.css" /><link rel="stylesheet" type="text/css" href="http://s0.hao123img.com/resource/tuijian/widget/index/hotrank/common/slider/slider.4d7a174.css" /><link rel="stylesheet" type="text/css" href="http://s0.hao123img.com/resource/tuijian/widget/index/hotrank/index/news/news.9e71d5b.css" /><link rel="stylesheet" type="text/css" href="http://s1.hao123img.com/resource/tuijian/widget/index/hotrank/index/fyb/fyb.b016c1d.css" /><link rel="stylesheet" type="text/css" href="http://s0.hao123img.com/resource/tuijian/widget/index/hotrank/index/top/top.e073b71.css" /><link rel="stylesheet" type="text/css" href="http://s0.hao123img.com/resource/tuijian/widget/lift/lift.77f7c66.css" /><link rel="stylesheet" type="text/css" href="http://s2.hao123img.com/resource/fe/pkg/aio-8155b5719.3dd99d32e.css" /><script type="text/javascript">window.aid = "nWRkrj61PjnYriYYrHfsrHbsnHb";</script></head><body>


<div  >热点排行</a></li></ul></div></div></div></div></div>


<div >
<div class="mains">
<div ></a></div></div></div></div></div></div></div></div>

</div>
</div>
<div class="lift-fixed" >>返回顶部</a></div></div></div>









<div class="c-footer w1190 " monkey="erji-footer"><div class="container"><div class="left-wrap clearfix"><a href="https://www.hao123.com" class="logo" target="_blank" monkey="logo">hao123 上网导航第一品牌</a><div class="bottom-nav" monkey="nav"><a href="http://www.hao123.com/abouthao123.htm" class="item" target="_target">关于我们</a><a href="http://www.hao123.com/redian/problem.htm" class="item" target="_taregt">常见问题</a><a href="http://www.hao123.com/feedback" class="item" target="_blank">反馈意见</a><a href="http://www.hao123.com/sitemap" class="item" target="_blank">全站地图</a><span class="item">京ICP证030173号</span></div></div><div class="right-wrap"><div class="extend-nav clearfix" monkey="tools"><a href="http://www.hao123.com/shouji" class="item" target="_blank"><i class="icon-phone"></i><span>下载<br>手机端</span></a><a href="javascript:;" class="item" target="_black" data-hook="addBookmark"><i class="icon-collect"></i><span>收藏<br>本站</span></a></div></div></div></div>




<script type="text/javascript" src="http://s0.hao123img.com/res/js/common/as.min.js?_=2434"></script>










<script src="http://s2.hao123img.com/resource/fe/js/lib/main.5a7c104a8.js"></script>
<script>BigPipe.lazyPagelets = [];</script>
<script>BigPipe.loadedResource(["5a7c104a8_7959","d8b3cc9ac_29e3","38645dd_f7dd","8d1d978b0_a316","6cca09af6_f07f","a0832ac19_fb25","25330c25d_ce62","deba0d4c0_c8fe","1c81d5fc6_a695","0c7877e81_8719","6e9548c75_e646","38645dd_0f3e","3f6d691_9321","4d7a174_ccfc","9e71d5b_bed3","b016c1d_d1a3","e073b71_9403","77f7c66_45f3","95a138325_0731"]);</script><script>BigPipe.hooks["__cb_0_1"]=function(){'use strict';

            var $ = require('fe:widget/js/base/jquery.js');
            var fixreferrer = require('fe:widget/js/base/fixreferrer.js');
            HAO.https && fixreferrer.init($(document));
        };</script>
<script>BigPipe.hooks["__cb_0_2"]=function(){'use strict';
var $ = require('fe:widget/js/base/jquery.js');$('div[data-hook="sitemap"]').on('mouseenter', function (e) {$(this).addClass('sitemap-hover');}).on('mouseleave', function (e) {$(this).removeClass('sitemap-hover');});};</script>
<script>BigPipe.hooks["__cb_0_3"]=function(){'use strict';
var $ = require('fe:widget/js/base/jquery.js');var Search = require('fe:widget/js/base/search.js');var headerSearchInstance = new Search($('form[data-hook="search-form"]'));};</script>
<script>BigPipe.hooks["__cb_0_4"]=function(){'use strict';
var $ = require('fe:widget/js/base/jquery.js');var events = require('fe:widget/js/lib/events.js');var login = require('fe:widget/js/base/login.js');var sethome = require('fe:widget/js/base/sethome.js');var $loginCon = $('div[data-hook="c-header-login"]');var $loginDrop = $('div[js-hook="popup-list"]');login.init();events.on('loginSuccess', function(userinfo) {$loginCon.addClass('success');$loginCon.find('.key .word').html(userinfo.userName);/* if ($loginCon.find('.key .word').width() >= 60) {$loginCon.find('.key .word').width(50);$loginDrop.outerWidth($loginCon.outerWidth());}*/$('[data-hook=login]').removeAttr('data-hook');});$loginCon.mouseenter(function() {if($(this).hasClass('success')) {$(this).addClass('hover');}}).mouseleave(function() {$(this).removeClass('hover');});$('div[data-hhok="qrcode"]').on('mouseenter', function () {$(this).children('div').show();}).on('mouseleave', function () {$(this).children('div').hide();}).on('click', function (ev) {if ($(this).children('div').length > 0) {return false;}});if($('[data-hook=setHome]').length) {sethome.init();}};</script>
<script>BigPipe.hooks["__cb_0_5"]=function(){'use strict';
var $ = require('fe:widget/js/base/jquery.js');var popupWidth;$('div[data-hook="nav-more"]').on('mouseenter', function () {popupWidth = $(this).children('div').width();$(this).addClass('nav-more-hover');}).on('mouseleave', function () {$(this).removeClass('nav-more-hover');});};</script>
<script>BigPipe.hooks["__cb_0_6"]=function(){'use strict';
var $ = require('fe:widget/js/base/jquery.js');var $v2Header = $('#erjiV2Header');var $fixedNav = $('#fixedNav');if ($v2Header.hasClass('v2-fixed') && !($.browser.msie && $.browser.version < 7)) {var offHeight = 0;$(window).scroll(function () {offHeight = $v2Header.offset().top + 60;if ($(window).scrollTop() >= offHeight) {if (!$fixedNav.hasClass('nav-v2-fixed')) {$fixedNav.addClass('nav-v2-fixed').find('li.cur').removeClass('cur').addClass('cur');}}else if ($fixedNav.hasClass('nav-v2-fixed')) {$fixedNav.removeClass('nav-v2-fixed').find('li.cur').removeClass('cur').addClass('cur');}});}};</script>
<script>BigPipe.hooks["__cb_0_7"]=function(){'use strict';
var $ = require('fe:widget/js/base/jquery.js');var Slider = require('fe:widget/js/util/slider.js');new Slider($('.slider'));};</script>
<script>BigPipe.hooks["__cb_0_8"]=function(){'use strict';
if(typeof BAIDU_SS_HHRUN!='function'){var d=document;(d.getElementsByTagName('head')[0]||d.body).appendChild(d.createElement('script')).src='http://su.bdimg.com/static/dspui/js/ls.js?v='+~(-new Date()/5600e5)}else{BAIDU_SS_HHRUN()}};</script>
<script>BigPipe.hooks["__cb_0_9"]=function(){'use strict';
var lifttop = require('tuijian:widget/lift/lifttop.js');lifttop();};</script>
<script>BigPipe.hooks["__cb_0_10"]=function(){'use strict';

        window._bd_share_config = {
            common : {
                bdText : '',
                bdDesc : '',
                bdUrl : '',
                bdPic : ''
            },
            share : {
                "bdSize" : 24
            },
            selectShare : [{
                "bdselectMiniList" : ['tsina','weixin','qzone']
            }]
        };
        (document.getElementsByTagName('head')[0]||document.body)
        .appendChild(document.createElement('script')).src='http://bdimg.share.baidu.com/static/api/js/share.js?v=89860593.js?cdnversion='+~(-new Date());

        var shareEvent = require('tuijian:widget/index/content/shareEvent.js');
        shareEvent();
    };</script>
<script>BigPipe.hooks["__cb_0_11"]=function(){'use strict';
var addBookmark = require('fe:widget/js/base/addbookmark.js');addBookmark.init();};</script>
<script>BigPipe.hooks["__cb_0_12"]=function(){'use strict';

            
            (function initTrack(o){
                var d = document;
                var x = d.createElement("script");
                x.src = HAO.httpsTrans('http://s0.hao123img.com/res/js/track.js') + '?'+~(new Date/36e5);
                var a=[];
                if(o){
                  for(var i in o){
                    a.push(i + ":" + (o[i]))
                  }
                  var config = a.join(";");
                  x.setAttribute("data-log-config", config);
                  var s = d.getElementsByTagName("script")[0].parentNode;
                  var p= s || d.head;
                  if(p) {
                    setTimeout(function() {
                        p.appendChild(x)
                    }, 0);
                  }
                }
            })({
                  pageId: window.pageId,
                  page: window.pageId,
                  level: 2,
                  vp: window.pageVP || window.pageId,
                aid: window.aid || ''
            });
            
            window.js_track_loaded = function (success) {
                if (success) {
                    window.js_track_loaded = null;
                    if (window.aid) {
                        /* globals Monkey */
                        Monkey && Monkey.set && Monkey.set('aid', window.aid);
                    }
                }
            };

            // 跨站资源统计
            /* (function (doc) {
                var s = doc.createElement('script');
                s.src = HAO.httpsTrans('http://s0.hao123img.com/res/js/fe/cspalog.js') + '?t=' + (+new Date);
                var parent = doc.getElementsByTagName('script')[0].parentNode;
                parent.appendChild(s);
            })(document); */
        };</script>
<script>BigPipe.hooks["__cb_0_13"]=function(){'use strict';

    require.defer(["fe:widget/js/base/jquery.js","fe:widget/js/base/detect.js","tuijian:widget/index/kuaixun.js"], function ($, detect, kuaixun) {
        $(document).ready(function() {
            detect();
            kuaixun.init();
        });
    });
};</script>
<script>BigPipe.setResourceMap({"d8b3cc9ac_29e3":{"src":"http:\/\/s1.hao123img.com\/resource\/fe\/pkg\/aio-eef856ab5.231bb088c.css","type":"css","deps":[],"mods":["fe:resource\/css\/base.less"]},"38645dd_f7dd":{"src":"http:\/\/s2.hao123img.com\/resource\/tuijian\/css\/hotrank.38645dd.css","type":"css","deps":[],"mods":["tuijian:resource\/css\/hotrank.less"]},"8d1d978b0_a316":{"src":"http:\/\/s1.hao123img.com\/resource\/fe\/widget\/ui\/header\/common\/v2\/header.8d1d978b0.css","type":"css","deps":[],"mods":["fe:widget\/ui\/header\/common\/v2\/header.less"]},"6cca09af6_f07f":{"src":"http:\/\/s0.hao123img.com\/resource\/fe\/widget\/ui\/header\/common\/v2\/logo\/logo.6cca09af6.css","type":"css","deps":[],"mods":["fe:widget\/ui\/header\/common\/v2\/logo\/logo.less"]},"a0832ac19_fb25":{"src":"http:\/\/s1.hao123img.com\/resource\/fe\/widget\/ui\/header\/common\/v2\/sitemap\/sitemap.a0832ac19.css","type":"css","deps":[],"mods":["fe:widget\/ui\/header\/common\/v2\/sitemap\/sitemap.less"]},"25330c25d_ce62":{"src":"http:\/\/s2.hao123img.com\/resource\/fe\/widget\/ui\/header\/common\/v2\/adv\/adv.25330c25d.css","type":"css","deps":[],"mods":["fe:widget\/ui\/header\/common\/v2\/adv\/adv.less"]},"deba0d4c0_c8fe":{"src":"http:\/\/s0.hao123img.com\/resource\/fe\/widget\/ui\/header\/common\/v2\/form\/form.deba0d4c0.css","type":"css","deps":[],"mods":["fe:widget\/ui\/header\/common\/v2\/form\/form.less"]},"1c81d5fc6_a695":{"src":"http:\/\/s0.hao123img.com\/resource\/fe\/widget\/ui\/header\/common\/v2\/tools\/tools.1c81d5fc6.css","type":"css","deps":[],"mods":["fe:widget\/ui\/header\/common\/v2\/tools\/tools.less"]},"0c7877e81_8719":{"src":"http:\/\/s2.hao123img.com\/resource\/fe\/widget\/ui\/header\/common\/v2\/nav\/nav.0c7877e81.css","type":"css","deps":[],"mods":["fe:widget\/ui\/header\/common\/v2\/nav\/nav.less"]},"6e9548c75_e646":{"src":"http:\/\/s2.hao123img.com\/resource\/fe\/widget\/ui\/header\/common\/v2\/tuiguang\/tuiguang.6e9548c75.css","type":"css","deps":[],"mods":["fe:widget\/ui\/header\/common\/v2\/tuiguang\/tuiguang.less"]},"38645dd_0f3e":{"src":"http:\/\/s0.hao123img.com\/resource\/tuijian\/widget\/index\/hotrank\/hotrank.38645dd.css","type":"css","deps":[],"mods":["tuijian:widget\/index\/hotrank\/hotrank.less"]},"3f6d691_9321":{"src":"http:\/\/s2.hao123img.com\/resource\/tuijian\/widget\/index\/hotrank\/index\/slider\/slider.3f6d691.css","type":"css","deps":[],"mods":["tuijian:widget\/index\/hotrank\/index\/slider\/slider.less"]},"4d7a174_ccfc":{"src":"http:\/\/s0.hao123img.com\/resource\/tuijian\/widget\/index\/hotrank\/common\/slider\/slider.4d7a174.css","type":"css","deps":[],"mods":["tuijian:widget\/index\/hotrank\/common\/slider\/slider.less"]},"9e71d5b_bed3":{"src":"http:\/\/s0.hao123img.com\/resource\/tuijian\/widget\/index\/hotrank\/index\/news\/news.9e71d5b.css","type":"css","deps":[],"mods":["tuijian:widget\/index\/hotrank\/index\/news\/news.less"]},"b016c1d_d1a3":{"src":"http:\/\/s1.hao123img.com\/resource\/tuijian\/widget\/index\/hotrank\/index\/fyb\/fyb.b016c1d.css","type":"css","deps":[],"mods":["tuijian:widget\/index\/hotrank\/index\/fyb\/fyb.less"]},"e073b71_9403":{"src":"http:\/\/s0.hao123img.com\/resource\/tuijian\/widget\/index\/hotrank\/index\/top\/top.e073b71.css","type":"css","deps":[],"mods":["tuijian:widget\/index\/hotrank\/index\/top\/top.less"]},"77f7c66_45f3":{"src":"http:\/\/s0.hao123img.com\/resource\/tuijian\/widget\/lift\/lift.77f7c66.css","type":"css","deps":[],"mods":["tuijian:widget\/lift\/lift.less"]},"95a138325_0731":{"src":"http:\/\/s2.hao123img.com\/resource\/fe\/pkg\/aio-8155b5719.3dd99d32e.css","type":"css","deps":[],"mods":["fe:widget\/ui\/footer\/common\/footer.less"]},"ed29b1dff_99f2":{"src":"http:\/\/s1.hao123img.com\/resource\/fe\/pkg\/aio-752ba7752.ed29b1dff.js","type":"js","deps":[],"mods":["fe:widget\/js\/base\/jquery.js"]},"499abaa0e_acda":{"src":"http:\/\/s0.hao123img.com\/resource\/fe\/pkg\/aio-eef856ab5.499abaa0e.js","type":"js","deps":["ed29b1dff_99f2","15f327f0a_5d72"],"mods":["fe:widget\/js\/base\/browser.js","fe:widget\/js\/base\/fixreferrer.js"]},"15f327f0a_5d72":{"src":"http:\/\/s0.hao123img.com\/resource\/fe\/pkg\/aio-95cc3013d.15f327f0a.js","type":"js","deps":["ed29b1dff_99f2"],"mods":["fe:widget\/js\/base\/cookie.js"]},"331938377_b942":{"src":"http:\/\/s0.hao123img.com\/resource\/fe\/pkg\/aio-1c2d6f9f2.2b182a527.css","type":"css","deps":[],"mods":["fe:widget\/ui\/header\/common\/header.less"]},"2009b1512_46d0":{"src":"http:\/\/s0.hao123img.com\/resource\/fe\/pkg\/aio-1c2d6f9f2.2009b1512.js","type":"js","deps":["ed29b1dff_99f2","15f327f0a_5d72","331938377_b942"],"mods":["fe:widget\/js\/base\/sethome.js","fe:widget\/js\/lib\/events.js","fe:widget\/js\/base\/login.js","fe:widget\/js\/third\/arttemplate\/template-native.js","fe:widget\/js\/base\/autocomplete.js","fe:widget\/js\/base\/search.js","fe:widget\/ui\/header\/common\/header.js"]},"9a092a7f1_2a6f":{"src":"http:\/\/s0.hao123img.com\/resource\/fe\/widget\/js\/util\/slider.9a092a7f1.js","type":"js","deps":["ed29b1dff_99f2"],"mods":["fe:widget\/js\/util\/slider.js"]},"f271c78_c7d7":{"src":"http:\/\/s0.hao123img.com\/resource\/tuijian\/widget\/lift\/lifttop.f271c78.js","type":"js","deps":["ed29b1dff_99f2"],"mods":["tuijian:widget\/lift\/lifttop.js"]},"4d39d64_93de":{"src":"http:\/\/s1.hao123img.com\/resource\/tuijian\/widget\/index\/content\/shareEvent.4d39d64.js","type":"js","deps":["ed29b1dff_99f2"],"mods":["tuijian:widget\/index\/content\/shareEvent.js"]},"3ac67f28c_b365":{"src":"http:\/\/s2.hao123img.com\/resource\/fe\/pkg\/aio-8155b5719.3ac67f28c.js","type":"js","deps":["ed29b1dff_99f2"],"mods":["fe:widget\/js\/base\/addbookmark.js"]},"67402ee5d_d72b":{"src":"http:\/\/s2.hao123img.com\/resource\/fe\/widget\/js\/base\/track.67402ee5d.js","type":"js","deps":["ed29b1dff_99f2"],"mods":["fe:widget\/js\/base\/track.js"]},"f97e9ecfd_31c5":{"src":"http:\/\/s1.hao123img.com\/resource\/fe\/widget\/js\/base\/detect.f97e9ecfd.js","type":"js","deps":["67402ee5d_d72b"],"mods":["fe:widget\/js\/base\/detect.js"]},"2e29525_fe44":{"src":"http:\/\/s1.hao123img.com\/resource\/tuijian\/widget\/index\/kuaixun.2e29525.js","type":"js","deps":["ed29b1dff_99f2"],"mods":["tuijian:widget\/index\/kuaixun.js"]},"5a7c104a8_7959":{"src":"http:\/\/s2.hao123img.com\/resource\/fe\/js\/lib\/main.5a7c104a8.js","type":"js","deps":[],"mods":["fe:resource\/js\/lib\/main.js"]}});</script>
<script>BigPipe.onPageletArrive({"id":null,"children":[],"renderMode":"default","parent":null,"deps":{"beforedisplay":["d8b3cc9ac_29e3","38645dd_f7dd","8d1d978b0_a316","6cca09af6_f07f","a0832ac19_fb25","25330c25d_ce62","deba0d4c0_c8fe","1c81d5fc6_a695","0c7877e81_8719","6e9548c75_e646","38645dd_0f3e","3f6d691_9321","4d7a174_ccfc","9e71d5b_bed3","b016c1d_d1a3","e073b71_9403","77f7c66_45f3","95a138325_0731"],"load":["ed29b1dff_99f2","499abaa0e_acda","2009b1512_46d0","9a092a7f1_2a6f","f271c78_c7d7","4d39d64_93de","3ac67f28c_b365"]},"hooks":{"load":["__cb_0_1","__cb_0_2","__cb_0_3","__cb_0_4","__cb_0_5","__cb_0_6","__cb_0_7","__cb_0_8","__cb_0_9","__cb_0_10","__cb_0_11","__cb_0_12","__cb_0_13"]}});</script>
</body></html><!--24343361510346110218060803-->
<script> var _trace_page_logid = 2434336151; </script>
View Code

相关文章: