【问题标题】:Not able to read the dom content in puppeteer [node.js] using class & objects无法使用类和对象读取 puppeteer [node.js] 中的 dom 内容
【发布时间】:2020-09-09 14:45:44
【问题描述】:

我正在尝试使用类和对象读取来自indian superleague 的 dom 内容,例如目标、攻击、每个目标的分钟数等。它给出了这样的错误

评估失败:TypeError:无法读取未定义的属性“textContent”puppeteer_evaluation_script:7:64

这是代码

config.js

const puppeteer = require('puppeteer')

class Puppeteer{
    constructor(){
        this.param = {
            path: 'C:/Program Files (x86)/Google/Chrome/Application/chrome.exe',
            url: 'https://indiansuperleague.com',
        }   
    }

    async connect(){
        this.param.browser = await puppeteer.launch({executablePath: this.param.path, headless: false})
        this.param.page = await this.param.browser.newPage()
        await this.param.page.goto(this.param.url, {timeout: 0})
    }

    async disconnect(){
        await this.param.browser.close()
    }
}

module.exports = Puppeteer

states.js

class States{
    constructor(param){
        this.param = param
    }

    async fetchData(){
        const page = this.param.page

        const res = await page.evaluate(() => {
            const title = 'si-fkt-sctn-title', value = 'si-fkt-sctn-number'
            // const titleArray = document.getElementsByClassName(title)
            // const valueArray = document.getElementsByClassName(value)

            let key = document.getElementsByClassName(title)[0].textContent.trim()
            let num = document.getElementsByClassName(value)[0].textContent.trim()

            /* for(let i=0; i<titleArray.length; i++){
                key[i] = titleArray[i].textContent.trim()
                num[i] = valueArray[i].textContent.trim()
                // Object.defineProperty(temp, key, {value:num,writable: true,configurable: true,enumerable: true})
            } */

            return {key, num}
        })

        console.log(res)
    }

}

module.exports = States

app.js

const Puppeteer = require('./config')
const States = require('./modules/states')

const puppeteer = new Puppeteer()
const states = new States(puppeteer.param)

puppeteer.connect().then(async() => {    
    let res = await states.fetchData()
    console.log(res)
    
    await puppeteer.disconnect()
}).catch(e => console.log(e))

解决办法是什么?

【问题讨论】:

    标签: javascript node.js web-scraping puppeteer webautomation


    【解决方案1】:

    元素可能会在一段时间后动态创建。在从它们检索数据之前,您可以尝试使用page.waitForSelector()。例如:

    'use strict';
    
    const puppeteer = require('puppeteer');
    
    (async function main() {
      try {
        const browser = await puppeteer.launch();
        const [page] = await browser.pages();
    
        await page.goto('https://indiansuperleague.com');
    
        await Promise.all([
          page.waitForSelector('.si-fkt-sctn-title'),
          page.waitForSelector('.si-fkt-sctn-number'),
        ]);
    
        const data = await page.evaluate(() => {
          return [
            document.querySelector('.si-fkt-sctn-title').textContent,
            document.querySelector('.si-fkt-sctn-number').textContent,
          ];
        });
    
        console.log(data);
    
        await browser.close();
      } catch (err) {
        console.error(err);
      }
    })();
    

    输出:

    [ ' Goals', ' 63' ]
    

    【讨论】:

      猜你喜欢
      • 2019-03-25
      • 1970-01-01
      • 1970-01-01
      • 2011-09-11
      • 2021-09-07
      • 2011-12-07
      • 1970-01-01
      • 1970-01-01
      • 2013-06-20
      相关资源
      最近更新 更多