【发布时间】:2021-06-28 17:36:06
【问题描述】:
更新:console.log(foodGroupIdsOnPage1) 无论在 for 循环外部还是内部都有效。所以我认为问题是剧作家(节点包)无法评估“id”变量。
我为一页html做了一个数据抓取器,它成功地抓取了数据。代码如下。
const playwright = require('playwright');
async function initialize() { //nodeJS
const browser = await playwright.chromium.launch({
headless: false
});
const context = await browser.newContext();
const tab1 = await context.newPage();
return { tab1, context };
}
async function main() {
const { tab1, context } = await initialize();
await tab1.goto('https://www.cfs.gov.hk/tc_chi/nutrient/search1.php');
await tab1.waitForFunction(async function () {
const foodGroupIdsOnPage1 = document.querySelector('.tableResponsive').querySelectorAll('td')
const page1_foodGroupButton = foodGroupIdsOnPage1[0].querySelector('a')
page1_foodGroupButton.click();
});
await tab1.waitForTimeout(2000);
await tab1.click('[id^=grp] > a');
await tab1.waitForTimeout(2000);
const ArrayOfTabs = context.pages();
let tab2 = ArrayOfTabs[1];
await tab2.evaluate(function () {
let tableOfAllFoods = []
let rowsOnPage3 = document.querySelector(".colorTable2").querySelectorAll("tr");
for (let row_OnPage3 of rowsOnPage3) {
let arrayNutritionOfOneFood = []
let cellsInOneRow = row_OnPage3.querySelectorAll("td");
for (let cell of cellsInOneRow) {
arrayNutritionOfOneFood.push(cell.innerText)
}
tableOfAllFoods.push(arrayNutritionOfOneFood);
}
console.log(tableOfAllFoods)
});
tab2.close();
tab1.goBack();
}
main();
但是,我想添加一个 for 循环,以便它可以成为一个多页刮板。并且代码中断。第 32 行的变量“id”未定义。而且我不知道它未定义的原因。
const playwright = require('playwright');
async function initialize() {
const browser = await playwright.chromium.launch({
headless: false
});
const context = await browser.newContext(); ////create a new browser context, which means no cookies and cache saved
const tab1 = await context.newPage();
return { tab1, context };
}
async function main() {
const { tab1, context } = await initialize();
await tab1.goto('https://www.cfs.gov.hk/tc_chi/nutrient/search1.php');
////const page1_foodGroupsLinksArray = document.querySelector('.tableResponsive').querySelectorAll('td');
// Node.js (Playwright) Context
const foodGroupIdsOnPage1 = await tab1.evaluate(async function getFoodGroupsOnPage1() {
return [...document.querySelector('.tableResponsive').querySelectorAll('td ')].map(e => e.id);
})
// Node.js context
for (let id of foodGroupIdsOnPage1) {
await tab1.waitForFunction(async function GotoPage2() { ////wait for function>>>only run the next command after all the commands inside the next bracket run
// Browser Context
// for (let page1_foodGroupLink of page1_foodGroupsLinksArray) {
const page1_foodGroupButton = document.querySelector(id).querySelector('a') ////beginning of the for loop
await page1_foodGroupButton.click();
});
////console.log(id);
await tab1.waitForTimeout(2000);
await tab1.click('[id^=grp] > a');
await tab1.waitForTimeout(2000);
const ArrayOfTabs = context.pages(); ////get how many tabs chromium are
let tab2 = ArrayOfTabs[1]; ////make active tab to the second tab
await tab2.evaluate(function extractFoodGroupData() {
let tableOfAllFoods = [];
let rowsOnPage3 = document.querySelector(".colorTable2").querySelectorAll("tr");
for (let row_OnPage3 of rowsOnPage3) {
let arrayNutritionOfOneFood = [];
let cellsInOneRow = row_OnPage3.querySelectorAll("td");
for (let cell of cellsInOneRow) {
arrayNutritionOfOneFood.push(cell.innerText);
}
////arrayNutritionOfOneFood.push(group_id);
////group_id=group_id+1;
tableOfAllFoods.push(arrayNutritionOfOneFood);
}
console.log(tableOfAllFoods);
});
tab2.close();
tab1.goBack();
////}
////},[id])
}
};
main();
谁能告诉我如何修复它以及它破裂的原因。提前感谢您的帮助!
【问题讨论】:
-
应该是
document.querySelector('#' + id)你需要#前缀来表示选择器是一个ID。 -
谢谢巴马尔。但终端返回此消息: (node:18792) UnhandledPromiseRejectionWarning: page.waitForFunction: Evaluation failed: ReferenceError: id is not defined
-
错误发生在哪一行?
-
" for (let id of foodGroupIdsOnPage1) {" 这一行
-
这毫无意义。那是定义变量的行。只有在引用变量时才会发生错误。真的是第 32 行吗?
标签: javascript node.js for-loop playwright queryselector