【发布时间】:2023-03-20 15:16:01
【问题描述】:
我正在开发一个 Express Web 应用程序,该应用程序在第一次加载页面时运行 JavaScript 抓取代码。
这里是节点网页抓取代码(scrape.js):
const request = require('request-promise');
const cheerio = require('cheerio');
const fs = require('fs');
const data = require('../public/state_data.json');
const cases_data = require('../public/cases_data.json');
// retrieve wikipeida page
request('https://en.wikipedia.org/wiki/2020_coronavirus_pandemic_in_the_United_States', (error, response, html) => {
if(!error && response.statusCode == 200) {
// create cheerio scraper
const $ = cheerio.load(html);
// find, and loop through all the rows in the table
var rows = $('.wikitable').find('tr');
for(var i = 3; i < 59; i++) {
// scrape state name and cases from table
var state = $(rows[i]).children('th:nth-child(2)').text().split("\n");
var cases = parseInt($(rows[i]).children('td').html().replace(",", ""));
// update state_data.json file w/ proper cases and per capita
for(var j = 0; j < data.length; j++) {
if(data[j].state === state[0]) {
// push new data to cases_data.json
cases_data.push({
state: state[0],
latitude: data[j].latitude,
longitude: data[j].longitude,
cases: cases,
percapita: (cases / data[j].population)
});
// write to new cases_data.json file w/ state name, cases and calculated per capita
fs.writeFile('../public/cases_data.json', JSON.stringify(cases_data, null, 2), function(err) {
if (err) throw err;
});
}
}
}
} else {
console.log('request error')
}
});
这里是 express 应用 (app.js):
const express = require('express');
const app = express();
const port = 3000;
const scrape = require('./scrape.js');
app.get('/', (req, res) => {
scrape();
res.render('index');
})
app.listen(port, () => console.log(`Example app listening at http://localhost:${port}`))
现在当我运行“node app.js”时,我得到了一个错误:
TypeError: scrape 不是函数
我尝试将 scrape.js 包装在一个函数中,但无济于事。有什么想法吗?
修复/解决方案:
我必须导出请求函数,如下代码所示:
module.exports = () => {
request('https://en.wikipedia.org/wiki/2020_coronavirus_pandemic_in_the_United_States', (error, response, html)) => {
... remaining code ...
}
}
【问题讨论】:
标签: javascript node.js express web-scraping