【发布时间】:2021-11-18 17:26:09
【问题描述】:
我想对 url https://data.anbima.com.br/debentures/AGRU12/agenda... 中的主表格正文中的所有数据执行网络 scraping... 但是,由于它实现了分页,因此我无法轻松完成。 ..我想出了以下代码,但它不起作用...我收到错误ReferenceError: list is not defined,尽管我在while循环之前定义了它...
const puppeteer = require('puppeteer');
const fs = require('fs');
(async () => {
const browser = await puppeteer.launch();
const page = await browser.newPage();
await page.goto(`https://data.anbima.com.br/debentures/AGRU12/agenda`);
await page.waitForSelector('.normal-text');
var list = [];
while (true) {
let nextButton;
await page.evaluate(async () => {
const nodeList = document.querySelectorAll(
'.anbima-ui-table > tbody > tr'
);
let nodeArray = [...nodeList];
nextButton = document.querySelector('.anbima-ui-pagination__next-button');
let listA = nodeArray
.map((tbody) => [...tbody.children].map((td) => [...td.children]))
.map((tr) =>
tr.map((span) =>
span[0].innerHTML
.replace('<label class="flag__children">', '')
.replace('</label>', '')
)
);
list.push(listA);
});
if (!nextButton) {
break;
} else {
await page.goto(nextButton.href);
}
}
fs.writeFile('eventDates.json', JSON.stringify(list[0], null, 2), (err) => {
if (err) throw new Error('Something went wrong');
console.log('well done you got the dates');
});
await browser.close();
})();
【问题讨论】:
标签: javascript while-loop puppeteer goto recurrence