【发布时间】:2017-12-12 12:08:56
【问题描述】:
我有一组 URL,我希望 PhantomJS 使用 async.map() 访问并将它们并行保存为 html。鉴于我必须关闭打开的页面以释放 RAM,我相信我需要使用 page.close()。我这样做了。
但是,我也想在 Phantom 完成后退出,我尝试通过 async.waterfall() 实现此添加,但它立即存在。
我怎样才能做到这一点?
var fs = require("fs");
var async = require("async");
var urls = [
{"url": "https://www.google.com", "html": "google"},
{"url": "http://yahoo.com", "html": "yahoo"}
];
async.waterfall([
function (callback2) {
async.map(urls, function (a, callback) {
var resourceWait = 300,
maxRenderWait = 5000,
url = a.url;
var page = require('webpage').create(),
count = 0,
forcedRenderTimeout,
renderTimeout;
page.viewportSize = {width: 1440, height: 900};
function doRender() {
var content = page.content;
var path = '../public/html/' + a.html + '.html';
fs.write(path, content, 'w');
page.close();
}
page.onResourceRequested = function (req) {
count += 1;
clearTimeout(renderTimeout);
};
page.onResourceReceived = function (res) {
if (!res.stage || res.stage === 'end') {
count -= 1;
if (count === 0) {
renderTimeout = setTimeout(doRender, resourceWait);
}
}
};
page.open(url, function (status) {
if (status !== "success") {
console.log('Unable to load url');
} else {
forcedRenderTimeout = setTimeout(function () {
doRender();
}, maxRenderWait);
}
});
callback();
});
callback2(null, 'done!');
}
], function (err, result) {
phantom.exit();
});
【问题讨论】:
标签: asynchronous phantomjs async.js