【发布时间】:2020-08-17 14:49:54
【问题描述】:
我正在尝试遍历页面上的多个元素并在每个 div 中获取一些数据....但是它看起来不像 xpathpage.$x(<query>) 支持上下文节点。要么,要么我没有正确指定上下文节点:
const puppeteer = require('puppeteer');
async function scrape(url) {
console.log('scraping', url);
const res = [];
const browser = await puppeteer.launch();
const page = await browser.newPage();
await page.goto(url);
const deals = await page.$x('//*[@class="a-row dealContainer dealTile"]/div/div[2]/div');
// const dealEls = await page.evaluate((...deals) => {
// return deals.map(el => el);
// }, ...deals);
console.log(deals.length, 'found');
for (let deal of deals) {
const [imgEl] = await deal.$x('//*[@id="dealImage"]/div/div/div[1]/img');
const src = await imgEl.getProperty('src');
const img = await src.jsonValue();
const [titleEl] = await deal.$x('//*[@id="dealTitle"]/span');
const text = await titleEl.getProperty('textContent');
const title = await text.jsonValue();
const [priceEl] = await deal.$x('//*[@id="101_dealView_0"]/div/div[2]/div/div/div[3]/div[1]/span');
const priceTxt = await priceEl.getProperty('textContent');
const price = await priceTxt.jsonValue();
//
const [origPriceEl] = await deal.$x('//*[@id="101_dealView_0"]/div/div[2]/div/div/div[3]/div[2]/span[2]');
const origTxt = await origPriceEl.getProperty('textContent');
const origPrice = await origTxt.jsonValue();
res.push({img, title: title.trim(), price, origPrice});
}
await browser.close();
console.log(res);
}
scrape('https://www.amazon.com/gp/goldbox/ref=gbps_ftr_s-5_111b_prm_ISPM?gb_f_deals1=dealTypes:DEAL_OF_THE_DAY%252CBEST_DEAL%252CLIGHTNING_DEAL,sortOrder:BY_SCORE,dealStates:AVAILABLE%252CWAITLIST%252CWAITLISTFULL,includedAccessTypes:GIVEAWAY_DEAL,enforcedCategories:172282,discountRanges:10-25%252C25-50%252C50-70,minRating:3,primeEligibleOnly:true&pf_rd_p=18c48e87-0e4c-4250-8975-2c9a7587111b&pf_rd_s=slot-5&pf_rd_t=701&pf_rd_i=gb_main&pf_rd_m=ATVPDKIKX0DER&pf_rd_r=8A7XXQ3N68HFWAMNKHZF&ie=UTF8')
【问题讨论】: