在对 PhantomJS 源代码进行大量试验、调整和逆向工程之后,加上它不再维护的事实,我从 58 版切换到带有 Node.js 驱动程序的无头 Chrome。它可以正确截取使用 WOFF 字体的网站的屏幕截图。
这是我为感兴趣的人准备的设置。
安装 Node.js 和 NPM
yum install epel-release
yum install nodejs
node --version # to confirm successful install
yum install npm
# OR, for v8
# curl -sL https://rpm.nodesource.com/setup_8.x | bash -
安装 Node.js 模块
npm install chrome-remote-interface --no-bin-links --save
npm install minimist --no-bin-links --save
在 CentOS 上安装 Chrome
cd /tmp
wget https://dl.google.com/linux/direct/google-chrome-stable_current_x86_64.rpm
yum -y localinstall google-chrome-*
google-chrome --version # to confirm successful install
Node.js 截图驱动脚本
将此脚本另存为screenshot.js。该脚本的来源最初来自here。我已经修改了我的版本以使其更灵活,但为了感谢作者schnerd,我将以其原始形式复制它:
const CDP = require('chrome-remote-interface');
const argv = require('minimist')(process.argv.slice(2));
const file = require('fs');
// CLI Args
const url = argv.url || 'https://www.google.com';
const format = argv.format === 'jpeg' ? 'jpeg' : 'png';
const viewportWidth = argv.viewportWidth || 1440;
const viewportHeight = argv.viewportHeight || 900;
const delay = argv.delay || 0;
const userAgent = argv.userAgent;
const fullPage = argv.full;
// Start the Chrome Debugging Protocol
CDP(async function(client) {
// Extract used DevTools domains.
const {DOM, Emulation, Network, Page, Runtime} = client;
// Enable events on domains we are interested in.
await Page.enable();
await DOM.enable();
await Network.enable();
// If user agent override was specified, pass to Network domain
if (userAgent) {
await Network.setUserAgentOverride({userAgent});
}
// Set up viewport resolution, etc.
const deviceMetrics = {
width: viewportWidth,
height: viewportHeight,
deviceScaleFactor: 0,
mobile: false,
fitWindow: false,
};
await Emulation.setDeviceMetricsOverride(deviceMetrics);
await Emulation.setVisibleSize({width: viewportWidth, height: viewportHeight});
// Navigate to target page
await Page.navigate({url});
// Wait for page load event to take screenshot
Page.loadEventFired(async () => {
// If the `full` CLI option was passed, we need to measure the height of
// the rendered page and use Emulation.setVisibleSize
if (fullPage) {
const {root: {nodeId: documentNodeId}} = await DOM.getDocument();
const {nodeId: bodyNodeId} = await DOM.querySelector({
selector: 'body',
nodeId: documentNodeId,
});
const {model: {height}} = await DOM.getBoxModel({nodeId: bodyNodeId});
await Emulation.setVisibleSize({width: viewportWidth, height: height});
// This forceViewport call ensures that content outside the viewport is
// rendered, otherwise it shows up as grey. Possibly a bug?
await Emulation.forceViewport({x: 0, y: 0, scale: 1});
}
setTimeout(async function() {
const screenshot = await Page.captureScreenshot({format});
const buffer = new Buffer(screenshot.data, 'base64');
file.writeFile('output.png', buffer, 'base64', function(err) {
if (err) {
console.error(err);
} else {
console.log('Screenshot saved');
}
client.close();
});
}, delay);
});
}).on('error', err => {
console.error('Cannot connect to browser:', err);
});
将 Chrome 作为后台进程运行
nohup google-chrome --headless --hide-scrollbars --remote-debugging-port=9222 --disable-gpu &
注意:--disable-gpu 目前是必需的,请参阅here
截图
node screenshot.js --url="http://castellsonclaret.com/public/external/georgiapro/demo.htm" --outFile="screenshot.png" --format="jpeg" --viewportWidth=1440 --viewportHeight=900 --delay=1000
结果
WOFF 演示:
浏览器功能测试: