【问题标题】:Is there a way to add script to add new functions in evaluate() context of chrome+puppeeter?有没有办法在 chrome+puppeeter 的 evaluate() 上下文中添加脚本来添加新函数?
【发布时间】:2018-07-06 16:17:03
【问题描述】:

基于这个response,有没有办法(比如使用 casperjs/phantomjs)在 page.evaluate() 上下文中添加我们的自定义函数?

例如,包含一个带有帮助函数 x 的文件以调用 Xpath 函数:x('//a/@href')

【问题讨论】:

    标签: javascript google-chrome xpath puppeteer


    【解决方案1】:

    您可以在单独的page.evaluate() 函数中注册辅助函数。 page.exposeFunction() 看起来很诱人,但它是 don't have access to browser context(而且你需要 document 对象)。

    这里是一个使用$x()注册辅助函数的例子:

    const puppeteer = require('puppeteer');
    
    const helperFunctions = () => {
        window.$x = xPath => document
            .evaluate(
                xPath,
                document,
                null,
                XPathResult.FIRST_ORDERED_NODE_TYPE,
                null
            )
            .singleNodeValue;
    };
    
    (async () => {
        const browser = await puppeteer.launch();
        const page = await browser.newPage();
        await page.goto('https://en.wikipedia.org', { waitUntil: 'networkidle2' });
    
        await page.evaluate(helperFunctions);
    
        const text = await page.evaluate(() => {
            // $x() is now available
            const featureArticle = $x('//*[@id="mp-tfa"]');
    
            return featureArticle.textContent;
        });
        console.log(text);
        await browser.close();
    })();
    

    (编辑 - 从文件中添加助手)

    您还可以将助手保存在单独的文件中,并通过page.addScriptTag() 将其注入浏览器上下文。 这是一个例子:

    helperFunctions.js

    window.$x = xPath => document
        .evaluate(
            xPath,
            document,
            null,
            XPathResult.FIRST_ORDERED_NODE_TYPE,
            null
        )
        .singleNodeValue;
    

    并使用它:

    const puppeteer = require('puppeteer');
    
    (async () => {
        const browser = await puppeteer.launch();
        const page = await browser.newPage();
        await page.goto('https://en.wikipedia.org', { waitUntil: 'networkidle2' });
    
        await page.addScriptTag({ path: './helperFunctions.js' });
    
        const text = await page.evaluate(() => {
            // $x() is now available
            const featureArticle = $x('//*[@id="mp-tfa"]');
    
            return featureArticle.textContent;
        });
        console.log(text);
        await browser.close();
    })();
    

    【讨论】:

    • 很好,但我们可以从文件中加载helperFunctions 吗?
    • @wam 是的,我已经用解释编辑了答案。
    【解决方案2】:

    基于getElementByXPath()getElementsByXPath()的另一种解决方案。优点是我们可以对特定节点使用 xpath 表达式(第二个参数)。

    window.$x = xPath => document
        .evaluate(
            xPath,
            document,
            null,
            XPathResult.FIRST_ORDERED_NODE_TYPE,
            null
        )
        .singleNodeValue;
    
    window.getElementByXPath = function getElementByXPath(expression, scope) {
        scope = scope || document;
        var a = document.evaluate(expression, scope, null, XPathResult.ORDERED_NODE_SNAPSHOT_TYPE, null);
        if (a.snapshotLength > 0) {
            return a.snapshotItem(0);
        }
    };
    
    window.getElementsByXPath = function getElementsByXPath(expression, scope) {
        scope = scope || document;
        var nodes = [];
        var a = document.evaluate(expression, scope, null, XPathResult.ORDERED_NODE_SNAPSHOT_TYPE, null);
        for (var i = 0; i < a.snapshotLength; i++) {
            nodes.push(a.snapshotItem(i));
        }
        return nodes;
    };
    

    现实生活中的代码示例:

    const puppeteer = require('puppeteer');
    
    (async () => {
    
        const browser = await puppeteer.launch();
        const page = await browser.newPage();
    
        await page.goto('https://99bitcoins.com/bitcoin-rich-list-top100/#addresses', { waitUntil: 'networkidle2' });
        await page.addScriptTag({ path: './helperFunctions.js' });
    
        const result = await page.evaluate(() => {
            var obj = {};
            var data = getElementsByXPath('//table[@class="t99btc-rich-list"]//tr');
            for (var i = 1; i<=100; i++) {
               obj[i] = {
                   "hash": getElementByXPath('./td/a', data[i]).innerText,
                   "balance": getElementByXPath('./td[3]', data[i]).innerText
               }
            }
    
            return obj;
    
        });
        console.log(JSON.stringify(result, null, 4));
        await browser.close();
    
    })();
    

    【讨论】:

      猜你喜欢
      • 1970-01-01
      • 2010-10-26
      • 1970-01-01
      • 2022-07-22
      • 2023-03-16
      • 1970-01-01
      • 1970-01-01
      • 1970-01-01
      • 1970-01-01
      相关资源
      最近更新 更多