【发布时间】:2020-11-30 12:22:28
【问题描述】:
我的程序应该在名为 suplexed.com 的网站上搜索给定的运动鞋代码,并返回给定运动鞋可用的最小尺寸的价格。当我使用节点在控制台中运行单个 .js 文件时,我确实得到了我正在寻找的值。但是,当我运行应该使用 .js 文件的 HTML 文件时,出现以下错误
Unhandled Promise Rejection: ReferenceError: Can't find variable: require
这是我的 javascript 代码,名为“scrapers.js”。我添加了 cmets 以便更好地理解。使用节点在控制台中运行 .js 文件时没有问题。但是当通过 HTML 使用 live-server 运行目录时,它不起作用。这里的问题在于前几行代码,因为它不会越过那里
async function scrapeStockx(url){
const site = "stockx.com";
let puppeteer = require('puppeteer-extra');
let pluginStealth = require('puppeteer-extra-plugin-stealth');
puppeteer.use(pluginStealth());
puppeteer = require('puppeteer');
const browser = await puppeteer.launch({headless: false})
const page = await browser.newPage();
await page.goto(url,{waitUntil: 'networkidle0'});
// get the value for the row sneaker size for stockx
await page.waitForXPath('//*[@id="pricer_table"]/tbody/tr[4]/td[1]');
const [sneakerSize]= await page.$x('//*[@id="pricer_table"]/tbody/tr[4]/td[1]');
const sneakerSizeTxt = await sneakerSize.getProperty('textContent');
const sneakerSizeRawTxt = await sneakerSizeTxt.jsonValue();
// get the value for the row sneaker size price for stockx
await page.waitForXPath('//*[@id="pricer_table"]/tbody/tr[4]/td[2]/span[1]');
const [sneakerPrice]= await page.$x('//*[@id="pricer_table"]/tbody/tr[4]/td[2]/span[1]');
const sneakerPriceTxt = await sneakerPrice.getProperty('textContent');
const sneakerPriceRawTxt = await sneakerPriceTxt.jsonValue();
// print the sneaker size and price to verify TO CONSOLE
console.log(printSizeAndPrices(sneakerSizeRawTxt, sneakerPriceRawTxt, site));
// print the sneaker size and price TO WEBSITE
let sneakerPrint = document.createElement("p");
sneakerPrint.textContent = printSizeAndPrice(sneakerPriceRawTxt, sneakerPriceRawTxt, site)
document.querySelector("body").appendChild(sneakerPrint);
browser.close();
}
document.querySelector("#search-form").addEventListener("submit", function(e){
e.preventDefault()
// get sneaker code using the textbox created in html
// const sneakerCode = e.target.elements.enterBox.value
// manually enter a unique sneaker code
const sneakerCode = "XXXXXX-XXX";
// print sneaker code to console to verify
console.log("sneakerCode: " + sneakerCode)
// create the website address of the sneaker
const address = ('https://suplexed.com/web/' + sneakerCode)
// print website address to verify
console.log("address: " + address)
// call the function to retrieve the wanted values for the chosen sneaker
scrapeStockx(address)
})
function printSizeAndPrice(sneakerSizeRawTxt, sneakerPriceRawTxt, site){
return `${sneakerSizeRawTxt} is ${sneakerPriceRawTxt} on ${site}`;
}
这是我的html代码
<!DOCTYPE html>
<html>
<head>
</head>
<body>
<p>Enter a sneaker code and hit the button to search</p>
<form id = "search-form">
<input type = "text" placeholder = "XXXXXX-XXX" name = "enterBox">
<button>Search</button>
</form>
<script src = "scrapers.js"></script>
</body>
</html>
【问题讨论】:
-
puppeteer 不会在浏览器中运行,它是一个节点库。
-
@pguardiario 那么你有什么替代网络抓取的方法吗?
-
您想将代码提交到后端(快递?)并在那里抓取。
标签: javascript html web-scraping puppeteer require