【发布时间】:2018-07-06 16:17:03
【问题描述】:
基于这个response,有没有办法(比如使用 casperjs/phantomjs)在 page.evaluate() 上下文中添加我们的自定义函数?
例如,包含一个带有帮助函数 x 的文件以调用 Xpath 函数:x('//a/@href')
【问题讨论】:
标签: javascript google-chrome xpath puppeteer
基于这个response,有没有办法(比如使用 casperjs/phantomjs)在 page.evaluate() 上下文中添加我们的自定义函数?
例如,包含一个带有帮助函数 x 的文件以调用 Xpath 函数:x('//a/@href')
【问题讨论】:
标签: javascript google-chrome xpath puppeteer
您可以在单独的page.evaluate() 函数中注册辅助函数。 page.exposeFunction() 看起来很诱人,但它是 don't have access to browser context(而且你需要 document 对象)。
这里是一个使用$x()注册辅助函数的例子:
const puppeteer = require('puppeteer');
const helperFunctions = () => {
window.$x = xPath => document
.evaluate(
xPath,
document,
null,
XPathResult.FIRST_ORDERED_NODE_TYPE,
null
)
.singleNodeValue;
};
(async () => {
const browser = await puppeteer.launch();
const page = await browser.newPage();
await page.goto('https://en.wikipedia.org', { waitUntil: 'networkidle2' });
await page.evaluate(helperFunctions);
const text = await page.evaluate(() => {
// $x() is now available
const featureArticle = $x('//*[@id="mp-tfa"]');
return featureArticle.textContent;
});
console.log(text);
await browser.close();
})();
(编辑 - 从文件中添加助手)
您还可以将助手保存在单独的文件中,并通过page.addScriptTag() 将其注入浏览器上下文。
这是一个例子:
helperFunctions.js
window.$x = xPath => document
.evaluate(
xPath,
document,
null,
XPathResult.FIRST_ORDERED_NODE_TYPE,
null
)
.singleNodeValue;
并使用它:
const puppeteer = require('puppeteer');
(async () => {
const browser = await puppeteer.launch();
const page = await browser.newPage();
await page.goto('https://en.wikipedia.org', { waitUntil: 'networkidle2' });
await page.addScriptTag({ path: './helperFunctions.js' });
const text = await page.evaluate(() => {
// $x() is now available
const featureArticle = $x('//*[@id="mp-tfa"]');
return featureArticle.textContent;
});
console.log(text);
await browser.close();
})();
【讨论】:
helperFunctions 吗?
基于casperjsgetElementByXPath()和getElementsByXPath()的另一种解决方案。优点是我们可以对特定节点使用 xpath 表达式(第二个参数)。
window.$x = xPath => document
.evaluate(
xPath,
document,
null,
XPathResult.FIRST_ORDERED_NODE_TYPE,
null
)
.singleNodeValue;
window.getElementByXPath = function getElementByXPath(expression, scope) {
scope = scope || document;
var a = document.evaluate(expression, scope, null, XPathResult.ORDERED_NODE_SNAPSHOT_TYPE, null);
if (a.snapshotLength > 0) {
return a.snapshotItem(0);
}
};
window.getElementsByXPath = function getElementsByXPath(expression, scope) {
scope = scope || document;
var nodes = [];
var a = document.evaluate(expression, scope, null, XPathResult.ORDERED_NODE_SNAPSHOT_TYPE, null);
for (var i = 0; i < a.snapshotLength; i++) {
nodes.push(a.snapshotItem(i));
}
return nodes;
};
现实生活中的代码示例:
const puppeteer = require('puppeteer');
(async () => {
const browser = await puppeteer.launch();
const page = await browser.newPage();
await page.goto('https://99bitcoins.com/bitcoin-rich-list-top100/#addresses', { waitUntil: 'networkidle2' });
await page.addScriptTag({ path: './helperFunctions.js' });
const result = await page.evaluate(() => {
var obj = {};
var data = getElementsByXPath('//table[@class="t99btc-rich-list"]//tr');
for (var i = 1; i<=100; i++) {
obj[i] = {
"hash": getElementByXPath('./td/a', data[i]).innerText,
"balance": getElementByXPath('./td[3]', data[i]).innerText
}
}
return obj;
});
console.log(JSON.stringify(result, null, 4));
await browser.close();
})();
【讨论】: