前面的话
本文将使用nodeJS实现一个简单的网页爬虫功能
网页源码
使用http.get()方法获取网页源码,以hao123网站的头条页面为例
http://tuijian.hao123.com/hotrank
var http = require('http'); http.get('http://tuijian.hao123.com/hotrank',function(res){ var data = ''; res.on('data',function(chunk){ data += chunk; }); res.on('end',function(){ console.log(data); }) });
获得的结果如下所示:
<!DOCTYPE html><html lang="zh_CN" bigrender="fe:widget/js/lib/bigRender.js"> <head><noscript><meta http-equiv="refresh" content="0; URL='/tuijian/hotrank?__noscript__-=1'" /></noscript> <meta charset="utf-8"> <meta http-equiv="X-UA-Compatible" content="IE=edge"> <meta name="viewport" content="width=device-width, initial-scale=1, maximum-scale=1, user-scalable=no"> <meta name="referrer" content="always"> <meta name="baidu-site-verification" content="cCHudceyEP" /> <meta name="baidu_union_verify" content="d7d644c8a5cb51b46c900d802d906116"> <meta name="keywords" content="头条,新闻,推荐,国内,国际,本地,财经,军事,娱乐,体育,社会,汽车,网站,新闻导航,今日头条,头条新闻,最新新闻,2017最新新闻,新闻名站,新闻大全,新闻门户" /> <meta name="description" content="hao123新闻频道,聚合全网最新的新闻,最热点的新闻,实时新闻热搜词,热门新闻报刊;更有军事新闻,娱乐新闻,体育新闻,图片新闻,汽车新闻,女性新闻,财经新闻,国内新闻,国际新闻等分类新闻" /> <title>热点排行榜-头条新闻-hao123新闻导航_hao123上网导航</title> <link rel="shortcut icon" href="//www.hao123.com/favicon.ico" /> <script> window.pageId = window.pageId || "hao123-xinwen-tuijian-hotrank"; window.pageVP = window.pageVP || "hao123-xinwen-tuijian-hotrank"; </script> <!--[if lt IE 7]> <script src="http://s0.hao123img.com/res/js/common/dd_belatedpng.min.js"></script> <script> DD_belatedPNG.fix('#channelTitle'); </script> <![endif]--> <script>window.HAO=window.HAO||{};window.HAO.https = false;window.HAO.httpsTrans = function(url){return url};</script> <link rel="stylesheet" type="text/css" href="http://s1.hao123img.com/resource/fe/pkg/aio-eef856ab5.231bb088c.css" /><link rel="stylesheet" type="text/css" href="http://s2.hao123img.com/resource/tuijian/css/hotrank.38645dd.css" /><link rel="stylesheet" type="text/css" href="http://s1.hao123img.com/resource/fe/widget/ui/header/common/v2/header.8d1d978b0.css" /><link rel="stylesheet" type="text/css" href="http://s0.hao123img.com/resource/fe/widget/ui/header/common/v2/logo/logo.6cca09af6.css" /><link rel="stylesheet" type="text/css" href="http://s1.hao123img.com/resource/fe/widget/ui/header/common/v2/sitemap/sitemap.a0832ac19.css" /><link rel="stylesheet" type="text/css" href="http://s2.hao123img.com/resource/fe/widget/ui/header/common/v2/adv/adv.25330c25d.css" /><link rel="stylesheet" type="text/css" href="http://s0.hao123img.com/resource/fe/widget/ui/header/common/v2/form/form.deba0d4c0.css" /><link rel="stylesheet" type="text/css" href="http://s0.hao123img.com/resource/fe/widget/ui/header/common/v2/tools/tools.1c81d5fc6.css" /><link rel="stylesheet" type="text/css" href="http://s2.hao123img.com/resource/fe/widget/ui/header/common/v2/nav/nav.0c7877e81.css" /><link rel="stylesheet" type="text/css" href="http://s2.hao123img.com/resource/fe/widget/ui/header/common/v2/tuiguang/tuiguang.6e9548c75.css" /><link rel="stylesheet" type="text/css" href="http://s0.hao123img.com/resource/tuijian/widget/index/hotrank/hotrank.38645dd.css" /><link rel="stylesheet" type="text/css" href="http://s2.hao123img.com/resource/tuijian/widget/index/hotrank/index/slider/slider.3f6d691.css" /><link rel="stylesheet" type="text/css" href="http://s0.hao123img.com/resource/tuijian/widget/index/hotrank/common/slider/slider.4d7a174.css" /><link rel="stylesheet" type="text/css" href="http://s0.hao123img.com/resource/tuijian/widget/index/hotrank/index/news/news.9e71d5b.css" /><link rel="stylesheet" type="text/css" href="http://s1.hao123img.com/resource/tuijian/widget/index/hotrank/index/fyb/fyb.b016c1d.css" /><link rel="stylesheet" type="text/css" href="http://s0.hao123img.com/resource/tuijian/widget/index/hotrank/index/top/top.e073b71.css" /><link rel="stylesheet" type="text/css" href="http://s0.hao123img.com/resource/tuijian/widget/lift/lift.77f7c66.css" /><link rel="stylesheet" type="text/css" href="http://s2.hao123img.com/resource/fe/pkg/aio-8155b5719.3dd99d32e.css" /><script type="text/javascript">window.aid = "nWRkrj61PjnYriYYrHfsrHbsnHb";</script></head><body> <div >热点排行</a></li></ul></div></div></div></div></div> <div > <div class="mains"> <div ></a></div></div></div></div></div></div></div></div> </div> </div> <div class="lift-fixed" >>返回顶部</a></div></div></div> <div class="c-footer w1190 " monkey="erji-footer"><div class="container"><div class="left-wrap clearfix"><a href="https://www.hao123.com" class="logo" target="_blank" monkey="logo">hao123 上网导航第一品牌</a><div class="bottom-nav" monkey="nav"><a href="http://www.hao123.com/abouthao123.htm" class="item" target="_target">关于我们</a><a href="http://www.hao123.com/redian/problem.htm" class="item" target="_taregt">常见问题</a><a href="http://www.hao123.com/feedback" class="item" target="_blank">反馈意见</a><a href="http://www.hao123.com/sitemap" class="item" target="_blank">全站地图</a><span class="item">京ICP证030173号</span></div></div><div class="right-wrap"><div class="extend-nav clearfix" monkey="tools"><a href="http://www.hao123.com/shouji" class="item" target="_blank"><i class="icon-phone"></i><span>下载<br>手机端</span></a><a href="javascript:;" class="item" target="_black" data-hook="addBookmark"><i class="icon-collect"></i><span>收藏<br>本站</span></a></div></div></div></div> <script type="text/javascript" src="http://s0.hao123img.com/res/js/common/as.min.js?_=2434"></script> <script src="http://s2.hao123img.com/resource/fe/js/lib/main.5a7c104a8.js"></script> <script>BigPipe.lazyPagelets = [];</script> <script>BigPipe.loadedResource(["5a7c104a8_7959","d8b3cc9ac_29e3","38645dd_f7dd","8d1d978b0_a316","6cca09af6_f07f","a0832ac19_fb25","25330c25d_ce62","deba0d4c0_c8fe","1c81d5fc6_a695","0c7877e81_8719","6e9548c75_e646","38645dd_0f3e","3f6d691_9321","4d7a174_ccfc","9e71d5b_bed3","b016c1d_d1a3","e073b71_9403","77f7c66_45f3","95a138325_0731"]);</script><script>BigPipe.hooks["__cb_0_1"]=function(){'use strict'; var $ = require('fe:widget/js/base/jquery.js'); var fixreferrer = require('fe:widget/js/base/fixreferrer.js'); HAO.https && fixreferrer.init($(document)); };</script> <script>BigPipe.hooks["__cb_0_2"]=function(){'use strict'; var $ = require('fe:widget/js/base/jquery.js');$('div[data-hook="sitemap"]').on('mouseenter', function (e) {$(this).addClass('sitemap-hover');}).on('mouseleave', function (e) {$(this).removeClass('sitemap-hover');});};</script> <script>BigPipe.hooks["__cb_0_3"]=function(){'use strict'; var $ = require('fe:widget/js/base/jquery.js');var Search = require('fe:widget/js/base/search.js');var headerSearchInstance = new Search($('form[data-hook="search-form"]'));};</script> <script>BigPipe.hooks["__cb_0_4"]=function(){'use strict'; var $ = require('fe:widget/js/base/jquery.js');var events = require('fe:widget/js/lib/events.js');var login = require('fe:widget/js/base/login.js');var sethome = require('fe:widget/js/base/sethome.js');var $loginCon = $('div[data-hook="c-header-login"]');var $loginDrop = $('div[js-hook="popup-list"]');login.init();events.on('loginSuccess', function(userinfo) {$loginCon.addClass('success');$loginCon.find('.key .word').html(userinfo.userName);/* if ($loginCon.find('.key .word').width() >= 60) {$loginCon.find('.key .word').width(50);$loginDrop.outerWidth($loginCon.outerWidth());}*/$('[data-hook=login]').removeAttr('data-hook');});$loginCon.mouseenter(function() {if($(this).hasClass('success')) {$(this).addClass('hover');}}).mouseleave(function() {$(this).removeClass('hover');});$('div[data-hhok="qrcode"]').on('mouseenter', function () {$(this).children('div').show();}).on('mouseleave', function () {$(this).children('div').hide();}).on('click', function (ev) {if ($(this).children('div').length > 0) {return false;}});if($('[data-hook=setHome]').length) {sethome.init();}};</script> <script>BigPipe.hooks["__cb_0_5"]=function(){'use strict'; var $ = require('fe:widget/js/base/jquery.js');var popupWidth;$('div[data-hook="nav-more"]').on('mouseenter', function () {popupWidth = $(this).children('div').width();$(this).addClass('nav-more-hover');}).on('mouseleave', function () {$(this).removeClass('nav-more-hover');});};</script> <script>BigPipe.hooks["__cb_0_6"]=function(){'use strict'; var $ = require('fe:widget/js/base/jquery.js');var $v2Header = $('#erjiV2Header');var $fixedNav = $('#fixedNav');if ($v2Header.hasClass('v2-fixed') && !($.browser.msie && $.browser.version < 7)) {var offHeight = 0;$(window).scroll(function () {offHeight = $v2Header.offset().top + 60;if ($(window).scrollTop() >= offHeight) {if (!$fixedNav.hasClass('nav-v2-fixed')) {$fixedNav.addClass('nav-v2-fixed').find('li.cur').removeClass('cur').addClass('cur');}}else if ($fixedNav.hasClass('nav-v2-fixed')) {$fixedNav.removeClass('nav-v2-fixed').find('li.cur').removeClass('cur').addClass('cur');}});}};</script> <script>BigPipe.hooks["__cb_0_7"]=function(){'use strict'; var $ = require('fe:widget/js/base/jquery.js');var Slider = require('fe:widget/js/util/slider.js');new Slider($('.slider'));};</script> <script>BigPipe.hooks["__cb_0_8"]=function(){'use strict'; if(typeof BAIDU_SS_HHRUN!='function'){var d=document;(d.getElementsByTagName('head')[0]||d.body).appendChild(d.createElement('script')).src='http://su.bdimg.com/static/dspui/js/ls.js?v='+~(-new Date()/5600e5)}else{BAIDU_SS_HHRUN()}};</script> <script>BigPipe.hooks["__cb_0_9"]=function(){'use strict'; var lifttop = require('tuijian:widget/lift/lifttop.js');lifttop();};</script> <script>BigPipe.hooks["__cb_0_10"]=function(){'use strict'; window._bd_share_config = { common : { bdText : '', bdDesc : '', bdUrl : '', bdPic : '' }, share : { "bdSize" : 24 }, selectShare : [{ "bdselectMiniList" : ['tsina','weixin','qzone'] }] }; (document.getElementsByTagName('head')[0]||document.body) .appendChild(document.createElement('script')).src='http://bdimg.share.baidu.com/static/api/js/share.js?v=89860593.js?cdnversion='+~(-new Date()); var shareEvent = require('tuijian:widget/index/content/shareEvent.js'); shareEvent(); };</script> <script>BigPipe.hooks["__cb_0_11"]=function(){'use strict'; var addBookmark = require('fe:widget/js/base/addbookmark.js');addBookmark.init();};</script> <script>BigPipe.hooks["__cb_0_12"]=function(){'use strict'; (function initTrack(o){ var d = document; var x = d.createElement("script"); x.src = HAO.httpsTrans('http://s0.hao123img.com/res/js/track.js') + '?'+~(new Date/36e5); var a=[]; if(o){ for(var i in o){ a.push(i + ":" + (o[i])) } var config = a.join(";"); x.setAttribute("data-log-config", config); var s = d.getElementsByTagName("script")[0].parentNode; var p= s || d.head; if(p) { setTimeout(function() { p.appendChild(x) }, 0); } } })({ pageId: window.pageId, page: window.pageId, level: 2, vp: window.pageVP || window.pageId, aid: window.aid || '' }); window.js_track_loaded = function (success) { if (success) { window.js_track_loaded = null; if (window.aid) { /* globals Monkey */ Monkey && Monkey.set && Monkey.set('aid', window.aid); } } }; // 跨站资源统计 /* (function (doc) { var s = doc.createElement('script'); s.src = HAO.httpsTrans('http://s0.hao123img.com/res/js/fe/cspalog.js') + '?t=' + (+new Date); var parent = doc.getElementsByTagName('script')[0].parentNode; parent.appendChild(s); })(document); */ };</script> <script>BigPipe.hooks["__cb_0_13"]=function(){'use strict'; require.defer(["fe:widget/js/base/jquery.js","fe:widget/js/base/detect.js","tuijian:widget/index/kuaixun.js"], function ($, detect, kuaixun) { $(document).ready(function() { detect(); kuaixun.init(); }); }); };</script> <script>BigPipe.setResourceMap({"d8b3cc9ac_29e3":{"src":"http:\/\/s1.hao123img.com\/resource\/fe\/pkg\/aio-eef856ab5.231bb088c.css","type":"css","deps":[],"mods":["fe:resource\/css\/base.less"]},"38645dd_f7dd":{"src":"http:\/\/s2.hao123img.com\/resource\/tuijian\/css\/hotrank.38645dd.css","type":"css","deps":[],"mods":["tuijian:resource\/css\/hotrank.less"]},"8d1d978b0_a316":{"src":"http:\/\/s1.hao123img.com\/resource\/fe\/widget\/ui\/header\/common\/v2\/header.8d1d978b0.css","type":"css","deps":[],"mods":["fe:widget\/ui\/header\/common\/v2\/header.less"]},"6cca09af6_f07f":{"src":"http:\/\/s0.hao123img.com\/resource\/fe\/widget\/ui\/header\/common\/v2\/logo\/logo.6cca09af6.css","type":"css","deps":[],"mods":["fe:widget\/ui\/header\/common\/v2\/logo\/logo.less"]},"a0832ac19_fb25":{"src":"http:\/\/s1.hao123img.com\/resource\/fe\/widget\/ui\/header\/common\/v2\/sitemap\/sitemap.a0832ac19.css","type":"css","deps":[],"mods":["fe:widget\/ui\/header\/common\/v2\/sitemap\/sitemap.less"]},"25330c25d_ce62":{"src":"http:\/\/s2.hao123img.com\/resource\/fe\/widget\/ui\/header\/common\/v2\/adv\/adv.25330c25d.css","type":"css","deps":[],"mods":["fe:widget\/ui\/header\/common\/v2\/adv\/adv.less"]},"deba0d4c0_c8fe":{"src":"http:\/\/s0.hao123img.com\/resource\/fe\/widget\/ui\/header\/common\/v2\/form\/form.deba0d4c0.css","type":"css","deps":[],"mods":["fe:widget\/ui\/header\/common\/v2\/form\/form.less"]},"1c81d5fc6_a695":{"src":"http:\/\/s0.hao123img.com\/resource\/fe\/widget\/ui\/header\/common\/v2\/tools\/tools.1c81d5fc6.css","type":"css","deps":[],"mods":["fe:widget\/ui\/header\/common\/v2\/tools\/tools.less"]},"0c7877e81_8719":{"src":"http:\/\/s2.hao123img.com\/resource\/fe\/widget\/ui\/header\/common\/v2\/nav\/nav.0c7877e81.css","type":"css","deps":[],"mods":["fe:widget\/ui\/header\/common\/v2\/nav\/nav.less"]},"6e9548c75_e646":{"src":"http:\/\/s2.hao123img.com\/resource\/fe\/widget\/ui\/header\/common\/v2\/tuiguang\/tuiguang.6e9548c75.css","type":"css","deps":[],"mods":["fe:widget\/ui\/header\/common\/v2\/tuiguang\/tuiguang.less"]},"38645dd_0f3e":{"src":"http:\/\/s0.hao123img.com\/resource\/tuijian\/widget\/index\/hotrank\/hotrank.38645dd.css","type":"css","deps":[],"mods":["tuijian:widget\/index\/hotrank\/hotrank.less"]},"3f6d691_9321":{"src":"http:\/\/s2.hao123img.com\/resource\/tuijian\/widget\/index\/hotrank\/index\/slider\/slider.3f6d691.css","type":"css","deps":[],"mods":["tuijian:widget\/index\/hotrank\/index\/slider\/slider.less"]},"4d7a174_ccfc":{"src":"http:\/\/s0.hao123img.com\/resource\/tuijian\/widget\/index\/hotrank\/common\/slider\/slider.4d7a174.css","type":"css","deps":[],"mods":["tuijian:widget\/index\/hotrank\/common\/slider\/slider.less"]},"9e71d5b_bed3":{"src":"http:\/\/s0.hao123img.com\/resource\/tuijian\/widget\/index\/hotrank\/index\/news\/news.9e71d5b.css","type":"css","deps":[],"mods":["tuijian:widget\/index\/hotrank\/index\/news\/news.less"]},"b016c1d_d1a3":{"src":"http:\/\/s1.hao123img.com\/resource\/tuijian\/widget\/index\/hotrank\/index\/fyb\/fyb.b016c1d.css","type":"css","deps":[],"mods":["tuijian:widget\/index\/hotrank\/index\/fyb\/fyb.less"]},"e073b71_9403":{"src":"http:\/\/s0.hao123img.com\/resource\/tuijian\/widget\/index\/hotrank\/index\/top\/top.e073b71.css","type":"css","deps":[],"mods":["tuijian:widget\/index\/hotrank\/index\/top\/top.less"]},"77f7c66_45f3":{"src":"http:\/\/s0.hao123img.com\/resource\/tuijian\/widget\/lift\/lift.77f7c66.css","type":"css","deps":[],"mods":["tuijian:widget\/lift\/lift.less"]},"95a138325_0731":{"src":"http:\/\/s2.hao123img.com\/resource\/fe\/pkg\/aio-8155b5719.3dd99d32e.css","type":"css","deps":[],"mods":["fe:widget\/ui\/footer\/common\/footer.less"]},"ed29b1dff_99f2":{"src":"http:\/\/s1.hao123img.com\/resource\/fe\/pkg\/aio-752ba7752.ed29b1dff.js","type":"js","deps":[],"mods":["fe:widget\/js\/base\/jquery.js"]},"499abaa0e_acda":{"src":"http:\/\/s0.hao123img.com\/resource\/fe\/pkg\/aio-eef856ab5.499abaa0e.js","type":"js","deps":["ed29b1dff_99f2","15f327f0a_5d72"],"mods":["fe:widget\/js\/base\/browser.js","fe:widget\/js\/base\/fixreferrer.js"]},"15f327f0a_5d72":{"src":"http:\/\/s0.hao123img.com\/resource\/fe\/pkg\/aio-95cc3013d.15f327f0a.js","type":"js","deps":["ed29b1dff_99f2"],"mods":["fe:widget\/js\/base\/cookie.js"]},"331938377_b942":{"src":"http:\/\/s0.hao123img.com\/resource\/fe\/pkg\/aio-1c2d6f9f2.2b182a527.css","type":"css","deps":[],"mods":["fe:widget\/ui\/header\/common\/header.less"]},"2009b1512_46d0":{"src":"http:\/\/s0.hao123img.com\/resource\/fe\/pkg\/aio-1c2d6f9f2.2009b1512.js","type":"js","deps":["ed29b1dff_99f2","15f327f0a_5d72","331938377_b942"],"mods":["fe:widget\/js\/base\/sethome.js","fe:widget\/js\/lib\/events.js","fe:widget\/js\/base\/login.js","fe:widget\/js\/third\/arttemplate\/template-native.js","fe:widget\/js\/base\/autocomplete.js","fe:widget\/js\/base\/search.js","fe:widget\/ui\/header\/common\/header.js"]},"9a092a7f1_2a6f":{"src":"http:\/\/s0.hao123img.com\/resource\/fe\/widget\/js\/util\/slider.9a092a7f1.js","type":"js","deps":["ed29b1dff_99f2"],"mods":["fe:widget\/js\/util\/slider.js"]},"f271c78_c7d7":{"src":"http:\/\/s0.hao123img.com\/resource\/tuijian\/widget\/lift\/lifttop.f271c78.js","type":"js","deps":["ed29b1dff_99f2"],"mods":["tuijian:widget\/lift\/lifttop.js"]},"4d39d64_93de":{"src":"http:\/\/s1.hao123img.com\/resource\/tuijian\/widget\/index\/content\/shareEvent.4d39d64.js","type":"js","deps":["ed29b1dff_99f2"],"mods":["tuijian:widget\/index\/content\/shareEvent.js"]},"3ac67f28c_b365":{"src":"http:\/\/s2.hao123img.com\/resource\/fe\/pkg\/aio-8155b5719.3ac67f28c.js","type":"js","deps":["ed29b1dff_99f2"],"mods":["fe:widget\/js\/base\/addbookmark.js"]},"67402ee5d_d72b":{"src":"http:\/\/s2.hao123img.com\/resource\/fe\/widget\/js\/base\/track.67402ee5d.js","type":"js","deps":["ed29b1dff_99f2"],"mods":["fe:widget\/js\/base\/track.js"]},"f97e9ecfd_31c5":{"src":"http:\/\/s1.hao123img.com\/resource\/fe\/widget\/js\/base\/detect.f97e9ecfd.js","type":"js","deps":["67402ee5d_d72b"],"mods":["fe:widget\/js\/base\/detect.js"]},"2e29525_fe44":{"src":"http:\/\/s1.hao123img.com\/resource\/tuijian\/widget\/index\/kuaixun.2e29525.js","type":"js","deps":["ed29b1dff_99f2"],"mods":["tuijian:widget\/index\/kuaixun.js"]},"5a7c104a8_7959":{"src":"http:\/\/s2.hao123img.com\/resource\/fe\/js\/lib\/main.5a7c104a8.js","type":"js","deps":[],"mods":["fe:resource\/js\/lib\/main.js"]}});</script> <script>BigPipe.onPageletArrive({"id":null,"children":[],"renderMode":"default","parent":null,"deps":{"beforedisplay":["d8b3cc9ac_29e3","38645dd_f7dd","8d1d978b0_a316","6cca09af6_f07f","a0832ac19_fb25","25330c25d_ce62","deba0d4c0_c8fe","1c81d5fc6_a695","0c7877e81_8719","6e9548c75_e646","38645dd_0f3e","3f6d691_9321","4d7a174_ccfc","9e71d5b_bed3","b016c1d_d1a3","e073b71_9403","77f7c66_45f3","95a138325_0731"],"load":["ed29b1dff_99f2","499abaa0e_acda","2009b1512_46d0","9a092a7f1_2a6f","f271c78_c7d7","4d39d64_93de","3ac67f28c_b365"]},"hooks":{"load":["__cb_0_1","__cb_0_2","__cb_0_3","__cb_0_4","__cb_0_5","__cb_0_6","__cb_0_7","__cb_0_8","__cb_0_9","__cb_0_10","__cb_0_11","__cb_0_12","__cb_0_13"]}});</script> </body></html><!--24343361510346110218060803--> <script> var _trace_page_logid = 2434336151; </script>